Commit | Line | Data |
---|---|---|
caab277b | 1 | // SPDX-License-Identifier: GPL-2.0-only |
9031fefd | 2 | /* |
0d747f65 | 3 | * VDSO implementations. |
9031fefd WD |
4 | * |
5 | * Copyright (C) 2012 ARM Limited | |
6 | * | |
9031fefd WD |
7 | * Author: Will Deacon <will.deacon@arm.com> |
8 | */ | |
9 | ||
5a9e3e15 | 10 | #include <linux/cache.h> |
9031fefd WD |
11 | #include <linux/clocksource.h> |
12 | #include <linux/elf.h> | |
13 | #include <linux/err.h> | |
14 | #include <linux/errno.h> | |
15 | #include <linux/gfp.h> | |
5a9e3e15 | 16 | #include <linux/kernel.h> |
9031fefd WD |
17 | #include <linux/mm.h> |
18 | #include <linux/sched.h> | |
19 | #include <linux/signal.h> | |
20 | #include <linux/slab.h> | |
ee3cda8e | 21 | #include <linux/time_namespace.h> |
c60b0c28 | 22 | #include <linux/timekeeper_internal.h> |
9031fefd | 23 | #include <linux/vmalloc.h> |
28b1a824 VF |
24 | #include <vdso/datapage.h> |
25 | #include <vdso/helpers.h> | |
26 | #include <vdso/vsyscall.h> | |
9031fefd WD |
27 | |
28 | #include <asm/cacheflush.h> | |
29 | #include <asm/signal32.h> | |
30 | #include <asm/vdso.h> | |
9031fefd | 31 | |
d3418f38 MR |
32 | enum vdso_abi { |
33 | VDSO_ABI_AA64, | |
d3418f38 | 34 | VDSO_ABI_AA32, |
c7aa2d71 | 35 | }; |
c7aa2d71 | 36 | |
3503d56c AV |
37 | enum vvar_pages { |
38 | VVAR_DATA_PAGE_OFFSET, | |
39 | VVAR_TIMENS_PAGE_OFFSET, | |
40 | VVAR_NR_PAGES, | |
41 | }; | |
42 | ||
d3418f38 | 43 | struct vdso_abi_info { |
c7aa2d71 VF |
44 | const char *name; |
45 | const char *vdso_code_start; | |
46 | const char *vdso_code_end; | |
47 | unsigned long vdso_pages; | |
48 | /* Data Mapping */ | |
49 | struct vm_special_mapping *dm; | |
50 | /* Code Mapping */ | |
51 | struct vm_special_mapping *cm; | |
52 | }; | |
53 | ||
d3418f38 MR |
54 | static struct vdso_abi_info vdso_info[] __ro_after_init = { |
55 | [VDSO_ABI_AA64] = { | |
c7aa2d71 VF |
56 | .name = "vdso", |
57 | .vdso_code_start = vdso_start, | |
58 | .vdso_code_end = vdso_end, | |
59 | }, | |
7c1deeeb | 60 | #ifdef CONFIG_COMPAT_VDSO |
d3418f38 | 61 | [VDSO_ABI_AA32] = { |
7c1deeeb VF |
62 | .name = "vdso32", |
63 | .vdso_code_start = vdso32_start, | |
64 | .vdso_code_end = vdso32_end, | |
65 | }, | |
66 | #endif /* CONFIG_COMPAT_VDSO */ | |
c7aa2d71 | 67 | }; |
9031fefd WD |
68 | |
69 | /* | |
70 | * The vDSO data page. | |
71 | */ | |
72 | static union { | |
28b1a824 | 73 | struct vdso_data data[CS_BASES]; |
9031fefd WD |
74 | u8 page[PAGE_SIZE]; |
75 | } vdso_data_store __page_aligned_data; | |
28b1a824 | 76 | struct vdso_data *vdso_data = vdso_data_store.data; |
9031fefd | 77 | |
871402e0 DS |
78 | static int vdso_mremap(const struct vm_special_mapping *sm, |
79 | struct vm_area_struct *new_vma) | |
c7aa2d71 | 80 | { |
c7aa2d71 VF |
81 | current->mm->context.vdso = (void *)new_vma->vm_start; |
82 | ||
83 | return 0; | |
84 | } | |
85 | ||
a7dcf58a | 86 | static int __init __vdso_init(enum vdso_abi abi) |
c7aa2d71 VF |
87 | { |
88 | int i; | |
89 | struct page **vdso_pagelist; | |
90 | unsigned long pfn; | |
91 | ||
d3418f38 | 92 | if (memcmp(vdso_info[abi].vdso_code_start, "\177ELF", 4)) { |
c7aa2d71 VF |
93 | pr_err("vDSO is not a valid ELF object!\n"); |
94 | return -EINVAL; | |
95 | } | |
96 | ||
d3418f38 MR |
97 | vdso_info[abi].vdso_pages = ( |
98 | vdso_info[abi].vdso_code_end - | |
99 | vdso_info[abi].vdso_code_start) >> | |
c7aa2d71 VF |
100 | PAGE_SHIFT; |
101 | ||
d53b5c01 | 102 | vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages, |
c7aa2d71 VF |
103 | sizeof(struct page *), |
104 | GFP_KERNEL); | |
105 | if (vdso_pagelist == NULL) | |
106 | return -ENOMEM; | |
107 | ||
c7aa2d71 | 108 | /* Grab the vDSO code pages. */ |
d3418f38 | 109 | pfn = sym_to_pfn(vdso_info[abi].vdso_code_start); |
c7aa2d71 | 110 | |
d3418f38 | 111 | for (i = 0; i < vdso_info[abi].vdso_pages; i++) |
d53b5c01 | 112 | vdso_pagelist[i] = pfn_to_page(pfn + i); |
c7aa2d71 | 113 | |
d53b5c01 | 114 | vdso_info[abi].cm->pages = vdso_pagelist; |
c7aa2d71 VF |
115 | |
116 | return 0; | |
117 | } | |
118 | ||
1b6867d2 | 119 | #ifdef CONFIG_TIME_NS |
3503d56c AV |
120 | struct vdso_data *arch_get_vdso_data(void *vvar_page) |
121 | { | |
122 | return (struct vdso_data *)(vvar_page); | |
123 | } | |
124 | ||
1b6867d2 AV |
125 | /* |
126 | * The vvar mapping contains data for a specific time namespace, so when a task | |
127 | * changes namespace we must unmap its vvar data for the old namespace. | |
128 | * Subsequent faults will map in data for the new namespace. | |
129 | * | |
130 | * For more details see timens_setup_vdso_data(). | |
131 | */ | |
132 | int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) | |
133 | { | |
134 | struct mm_struct *mm = task->mm; | |
135 | struct vm_area_struct *vma; | |
136 | ||
137 | mmap_read_lock(mm); | |
138 | ||
139 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | |
140 | unsigned long size = vma->vm_end - vma->vm_start; | |
141 | ||
142 | if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm)) | |
143 | zap_page_range(vma, vma->vm_start, size); | |
144 | #ifdef CONFIG_COMPAT_VDSO | |
145 | if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm)) | |
146 | zap_page_range(vma, vma->vm_start, size); | |
147 | #endif | |
148 | } | |
149 | ||
150 | mmap_read_unlock(mm); | |
151 | return 0; | |
152 | } | |
ee3cda8e AV |
153 | |
154 | static struct page *find_timens_vvar_page(struct vm_area_struct *vma) | |
155 | { | |
156 | if (likely(vma->vm_mm == current->mm)) | |
157 | return current->nsproxy->time_ns->vvar_page; | |
158 | ||
159 | /* | |
160 | * VM_PFNMAP | VM_IO protect .fault() handler from being called | |
161 | * through interfaces like /proc/$pid/mem or | |
162 | * process_vm_{readv,writev}() as long as there's no .access() | |
163 | * in special_mapping_vmops. | |
164 | * For more details check_vma_flags() and __access_remote_vm() | |
165 | */ | |
166 | WARN(1, "vvar_page accessed remotely"); | |
167 | ||
168 | return NULL; | |
169 | } | |
170 | #else | |
171 | static struct page *find_timens_vvar_page(struct vm_area_struct *vma) | |
172 | { | |
173 | return NULL; | |
174 | } | |
1b6867d2 AV |
175 | #endif |
176 | ||
d53b5c01 AV |
177 | static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, |
178 | struct vm_area_struct *vma, struct vm_fault *vmf) | |
179 | { | |
ee3cda8e AV |
180 | struct page *timens_page = find_timens_vvar_page(vma); |
181 | unsigned long pfn; | |
182 | ||
183 | switch (vmf->pgoff) { | |
184 | case VVAR_DATA_PAGE_OFFSET: | |
185 | if (timens_page) | |
186 | pfn = page_to_pfn(timens_page); | |
187 | else | |
188 | pfn = sym_to_pfn(vdso_data); | |
189 | break; | |
190 | #ifdef CONFIG_TIME_NS | |
191 | case VVAR_TIMENS_PAGE_OFFSET: | |
192 | /* | |
193 | * If a task belongs to a time namespace then a namespace | |
194 | * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and | |
195 | * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET | |
196 | * offset. | |
197 | * See also the comment near timens_setup_vdso_data(). | |
198 | */ | |
199 | if (!timens_page) | |
200 | return VM_FAULT_SIGBUS; | |
201 | pfn = sym_to_pfn(vdso_data); | |
202 | break; | |
203 | #endif /* CONFIG_TIME_NS */ | |
204 | default: | |
205 | return VM_FAULT_SIGBUS; | |
206 | } | |
207 | ||
208 | return vmf_insert_pfn(vma, vmf->address, pfn); | |
d53b5c01 AV |
209 | } |
210 | ||
d3418f38 | 211 | static int __setup_additional_pages(enum vdso_abi abi, |
c7aa2d71 VF |
212 | struct mm_struct *mm, |
213 | struct linux_binprm *bprm, | |
214 | int uses_interp) | |
215 | { | |
216 | unsigned long vdso_base, vdso_text_len, vdso_mapping_len; | |
bf740a90 | 217 | unsigned long gp_flags = 0; |
c7aa2d71 VF |
218 | void *ret; |
219 | ||
3503d56c AV |
220 | BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); |
221 | ||
d3418f38 | 222 | vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT; |
c7aa2d71 | 223 | /* Be sure to map the data page */ |
3503d56c | 224 | vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE; |
c7aa2d71 VF |
225 | |
226 | vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); | |
227 | if (IS_ERR_VALUE(vdso_base)) { | |
228 | ret = ERR_PTR(vdso_base); | |
229 | goto up_fail; | |
230 | } | |
231 | ||
3503d56c | 232 | ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE, |
d53b5c01 | 233 | VM_READ|VM_MAYREAD|VM_PFNMAP, |
d3418f38 | 234 | vdso_info[abi].dm); |
c7aa2d71 VF |
235 | if (IS_ERR(ret)) |
236 | goto up_fail; | |
237 | ||
bf740a90 MB |
238 | if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti()) |
239 | gp_flags = VM_ARM64_BTI; | |
240 | ||
3503d56c | 241 | vdso_base += VVAR_NR_PAGES * PAGE_SIZE; |
c7aa2d71 VF |
242 | mm->context.vdso = (void *)vdso_base; |
243 | ret = _install_special_mapping(mm, vdso_base, vdso_text_len, | |
bf740a90 | 244 | VM_READ|VM_EXEC|gp_flags| |
c7aa2d71 | 245 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, |
d3418f38 | 246 | vdso_info[abi].cm); |
c7aa2d71 VF |
247 | if (IS_ERR(ret)) |
248 | goto up_fail; | |
249 | ||
250 | return 0; | |
251 | ||
252 | up_fail: | |
253 | mm->context.vdso = NULL; | |
254 | return PTR_ERR(ret); | |
255 | } | |
256 | ||
9031fefd WD |
257 | #ifdef CONFIG_COMPAT |
258 | /* | |
259 | * Create and map the vectors page for AArch32 tasks. | |
260 | */ | |
1d09094a MR |
261 | enum aarch32_map { |
262 | AA32_MAP_VECTORS, /* kuser helpers */ | |
0cbc2659 | 263 | AA32_MAP_SIGPAGE, |
1d09094a MR |
264 | AA32_MAP_VVAR, |
265 | AA32_MAP_VDSO, | |
1d09094a | 266 | }; |
74fc72e7 MR |
267 | |
268 | static struct page *aarch32_vectors_page __ro_after_init; | |
74fc72e7 | 269 | static struct page *aarch32_sig_page __ro_after_init; |
74fc72e7 | 270 | |
7adbf10e WD |
271 | static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm, |
272 | struct vm_area_struct *new_vma) | |
273 | { | |
274 | current->mm->context.sigpage = (void *)new_vma->vm_start; | |
275 | ||
276 | return 0; | |
277 | } | |
278 | ||
1d09094a MR |
279 | static struct vm_special_mapping aarch32_vdso_maps[] = { |
280 | [AA32_MAP_VECTORS] = { | |
0d747f65 | 281 | .name = "[vectors]", /* ABI */ |
74fc72e7 | 282 | .pages = &aarch32_vectors_page, |
0d747f65 | 283 | }, |
0cbc2659 WD |
284 | [AA32_MAP_SIGPAGE] = { |
285 | .name = "[sigpage]", /* ABI */ | |
286 | .pages = &aarch32_sig_page, | |
7adbf10e | 287 | .mremap = aarch32_sigpage_mremap, |
0cbc2659 | 288 | }, |
1d09094a | 289 | [AA32_MAP_VVAR] = { |
7c1deeeb | 290 | .name = "[vvar]", |
d53b5c01 | 291 | .fault = vvar_fault, |
7c1deeeb | 292 | }, |
1d09094a | 293 | [AA32_MAP_VDSO] = { |
7c1deeeb | 294 | .name = "[vdso]", |
871402e0 | 295 | .mremap = vdso_mremap, |
7c1deeeb | 296 | }, |
0d747f65 | 297 | }; |
9031fefd | 298 | |
1255a734 | 299 | static int aarch32_alloc_kuser_vdso_page(void) |
9031fefd WD |
300 | { |
301 | extern char __kuser_helper_start[], __kuser_helper_end[]; | |
302 | int kuser_sz = __kuser_helper_end - __kuser_helper_start; | |
1255a734 | 303 | unsigned long vdso_page; |
9031fefd | 304 | |
af1b3cf2 VF |
305 | if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) |
306 | return 0; | |
307 | ||
7cd6ca1d | 308 | vdso_page = get_zeroed_page(GFP_KERNEL); |
1255a734 | 309 | if (!vdso_page) |
0d747f65 | 310 | return -ENOMEM; |
9031fefd | 311 | |
1255a734 | 312 | memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start, |
0d747f65 | 313 | kuser_sz); |
74fc72e7 | 314 | aarch32_vectors_page = virt_to_page(vdso_page); |
1255a734 VF |
315 | return 0; |
316 | } | |
317 | ||
6e554abd | 318 | #define COMPAT_SIGPAGE_POISON_WORD 0xe7fddef1 |
a39060b0 | 319 | static int aarch32_alloc_sigpage(void) |
1255a734 VF |
320 | { |
321 | extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; | |
322 | int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; | |
6e554abd WD |
323 | __le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD); |
324 | void *sigpage; | |
9031fefd | 325 | |
6e554abd | 326 | sigpage = (void *)__get_free_page(GFP_KERNEL); |
1255a734 VF |
327 | if (!sigpage) |
328 | return -ENOMEM; | |
0d747f65 | 329 | |
6e554abd WD |
330 | memset32(sigpage, (__force u32)poison, PAGE_SIZE / sizeof(poison)); |
331 | memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz); | |
74fc72e7 | 332 | aarch32_sig_page = virt_to_page(sigpage); |
a39060b0 WD |
333 | return 0; |
334 | } | |
9031fefd | 335 | |
a7dcf58a | 336 | static int __init __aarch32_alloc_vdso_pages(void) |
a39060b0 | 337 | { |
0cbc2659 WD |
338 | |
339 | if (!IS_ENABLED(CONFIG_COMPAT_VDSO)) | |
340 | return 0; | |
341 | ||
a39060b0 WD |
342 | vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; |
343 | vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; | |
9031fefd | 344 | |
a39060b0 | 345 | return __vdso_init(VDSO_ABI_AA32); |
9031fefd | 346 | } |
7c1deeeb VF |
347 | |
348 | static int __init aarch32_alloc_vdso_pages(void) | |
349 | { | |
a39060b0 WD |
350 | int ret; |
351 | ||
a39060b0 WD |
352 | ret = __aarch32_alloc_vdso_pages(); |
353 | if (ret) | |
354 | return ret; | |
a39060b0 WD |
355 | |
356 | ret = aarch32_alloc_sigpage(); | |
357 | if (ret) | |
358 | return ret; | |
359 | ||
360 | return aarch32_alloc_kuser_vdso_page(); | |
7c1deeeb | 361 | } |
0d747f65 | 362 | arch_initcall(aarch32_alloc_vdso_pages); |
9031fefd | 363 | |
0d747f65 | 364 | static int aarch32_kuser_helpers_setup(struct mm_struct *mm) |
9031fefd | 365 | { |
0d747f65 | 366 | void *ret; |
2fea7f6c | 367 | |
af1b3cf2 VF |
368 | if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) |
369 | return 0; | |
370 | ||
0d747f65 VF |
371 | /* |
372 | * Avoid VM_MAYWRITE for compatibility with arch/arm/, where it's | |
373 | * not safe to CoW the page containing the CPU exception vectors. | |
374 | */ | |
375 | ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE, | |
376 | VM_READ | VM_EXEC | | |
377 | VM_MAYREAD | VM_MAYEXEC, | |
1d09094a | 378 | &aarch32_vdso_maps[AA32_MAP_VECTORS]); |
0d747f65 VF |
379 | |
380 | return PTR_ERR_OR_ZERO(ret); | |
381 | } | |
382 | ||
383 | static int aarch32_sigreturn_setup(struct mm_struct *mm) | |
384 | { | |
385 | unsigned long addr; | |
2fea7f6c | 386 | void *ret; |
9031fefd | 387 | |
0d747f65 VF |
388 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); |
389 | if (IS_ERR_VALUE(addr)) { | |
390 | ret = ERR_PTR(addr); | |
391 | goto out; | |
392 | } | |
9031fefd | 393 | |
0d747f65 VF |
394 | /* |
395 | * VM_MAYWRITE is required to allow gdb to Copy-on-Write and | |
396 | * set breakpoints. | |
397 | */ | |
2fea7f6c | 398 | ret = _install_special_mapping(mm, addr, PAGE_SIZE, |
0d747f65 VF |
399 | VM_READ | VM_EXEC | VM_MAYREAD | |
400 | VM_MAYWRITE | VM_MAYEXEC, | |
1d09094a | 401 | &aarch32_vdso_maps[AA32_MAP_SIGPAGE]); |
0d747f65 VF |
402 | if (IS_ERR(ret)) |
403 | goto out; | |
9031fefd | 404 | |
a39060b0 | 405 | mm->context.sigpage = (void *)addr; |
9031fefd | 406 | |
0d747f65 | 407 | out: |
2fea7f6c | 408 | return PTR_ERR_OR_ZERO(ret); |
9031fefd | 409 | } |
0d747f65 VF |
410 | |
411 | int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |
412 | { | |
413 | struct mm_struct *mm = current->mm; | |
414 | int ret; | |
415 | ||
d8ed45c5 | 416 | if (mmap_write_lock_killable(mm)) |
0d747f65 VF |
417 | return -EINTR; |
418 | ||
419 | ret = aarch32_kuser_helpers_setup(mm); | |
420 | if (ret) | |
421 | goto out; | |
422 | ||
0cbc2659 | 423 | if (IS_ENABLED(CONFIG_COMPAT_VDSO)) { |
2a30aca8 | 424 | ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm, |
0cbc2659 WD |
425 | uses_interp); |
426 | if (ret) | |
427 | goto out; | |
428 | } | |
0d747f65 | 429 | |
a39060b0 | 430 | ret = aarch32_sigreturn_setup(mm); |
0d747f65 | 431 | out: |
d8ed45c5 | 432 | mmap_write_unlock(mm); |
0d747f65 VF |
433 | return ret; |
434 | } | |
9031fefd WD |
435 | #endif /* CONFIG_COMPAT */ |
436 | ||
1d09094a MR |
437 | enum aarch64_map { |
438 | AA64_MAP_VVAR, | |
439 | AA64_MAP_VDSO, | |
440 | }; | |
441 | ||
442 | static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = { | |
443 | [AA64_MAP_VVAR] = { | |
5a9e3e15 | 444 | .name = "[vvar]", |
d53b5c01 | 445 | .fault = vvar_fault, |
5a9e3e15 | 446 | }, |
1d09094a | 447 | [AA64_MAP_VDSO] = { |
5a9e3e15 | 448 | .name = "[vdso]", |
73958695 | 449 | .mremap = vdso_mremap, |
5a9e3e15 JZ |
450 | }, |
451 | }; | |
2fea7f6c | 452 | |
9031fefd WD |
453 | static int __init vdso_init(void) |
454 | { | |
1d09094a MR |
455 | vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR]; |
456 | vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO]; | |
601255ae | 457 | |
d3418f38 | 458 | return __vdso_init(VDSO_ABI_AA64); |
9031fefd WD |
459 | } |
460 | arch_initcall(vdso_init); | |
461 | ||
2a30aca8 | 462 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
9031fefd WD |
463 | { |
464 | struct mm_struct *mm = current->mm; | |
c7aa2d71 | 465 | int ret; |
9031fefd | 466 | |
d8ed45c5 | 467 | if (mmap_write_lock_killable(mm)) |
69048176 | 468 | return -EINTR; |
9031fefd | 469 | |
2a30aca8 | 470 | ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp); |
d8ed45c5 | 471 | mmap_write_unlock(mm); |
9031fefd | 472 | |
c7aa2d71 | 473 | return ret; |
9031fefd | 474 | } |