Commit | Line | Data |
---|---|---|
a17ae4c3 | 1 | // SPDX-License-Identifier: GPL-2.0 |
b020632e MS |
2 | /* |
3 | * vdso setup for s390 | |
4 | * | |
5 | * Copyright IBM Corp. 2008 | |
6 | * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) | |
b020632e MS |
7 | */ |
8 | ||
3994a52b | 9 | #include <linux/init.h> |
b020632e MS |
10 | #include <linux/errno.h> |
11 | #include <linux/sched.h> | |
12 | #include <linux/kernel.h> | |
13 | #include <linux/mm.h> | |
14 | #include <linux/smp.h> | |
15 | #include <linux/stddef.h> | |
16 | #include <linux/unistd.h> | |
17 | #include <linux/slab.h> | |
18 | #include <linux/user.h> | |
19 | #include <linux/elf.h> | |
20 | #include <linux/security.h> | |
57c8a661 | 21 | #include <linux/memblock.h> |
7757591a | 22 | #include <linux/compat.h> |
cbb870c8 | 23 | #include <asm/asm-offsets.h> |
b020632e | 24 | #include <asm/pgtable.h> |
b020632e MS |
25 | #include <asm/processor.h> |
26 | #include <asm/mmu.h> | |
27 | #include <asm/mmu_context.h> | |
28 | #include <asm/sections.h> | |
29 | #include <asm/vdso.h> | |
a0616cde | 30 | #include <asm/facility.h> |
b020632e | 31 | |
96ca7674 | 32 | #ifdef CONFIG_COMPAT_VDSO |
b020632e MS |
33 | extern char vdso32_start, vdso32_end; |
34 | static void *vdso32_kbase = &vdso32_start; | |
35 | static unsigned int vdso32_pages; | |
36 | static struct page **vdso32_pagelist; | |
37 | #endif | |
38 | ||
b020632e MS |
39 | extern char vdso64_start, vdso64_end; |
40 | static void *vdso64_kbase = &vdso64_start; | |
41 | static unsigned int vdso64_pages; | |
42 | static struct page **vdso64_pagelist; | |
b020632e MS |
43 | |
44 | /* | |
45 | * Should the kernel map a VDSO page into processes and pass its | |
46 | * address down to glibc upon exec()? | |
47 | */ | |
48 | unsigned int __read_mostly vdso_enabled = 1; | |
49 | ||
ef4b891f | 50 | static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, |
35bb092a MS |
51 | struct vm_area_struct *vma, struct vm_fault *vmf) |
52 | { | |
53 | struct page **vdso_pagelist; | |
54 | unsigned long vdso_pages; | |
55 | ||
56 | vdso_pagelist = vdso64_pagelist; | |
57 | vdso_pages = vdso64_pages; | |
96ca7674 | 58 | #ifdef CONFIG_COMPAT_VDSO |
d1befa65 | 59 | if (vma->vm_mm->context.compat_mm) { |
35bb092a MS |
60 | vdso_pagelist = vdso32_pagelist; |
61 | vdso_pages = vdso32_pages; | |
62 | } | |
63 | #endif | |
64 | ||
65 | if (vmf->pgoff >= vdso_pages) | |
66 | return VM_FAULT_SIGBUS; | |
67 | ||
68 | vmf->page = vdso_pagelist[vmf->pgoff]; | |
69 | get_page(vmf->page); | |
70 | return 0; | |
71 | } | |
72 | ||
73 | static int vdso_mremap(const struct vm_special_mapping *sm, | |
74 | struct vm_area_struct *vma) | |
75 | { | |
76 | unsigned long vdso_pages; | |
77 | ||
78 | vdso_pages = vdso64_pages; | |
96ca7674 | 79 | #ifdef CONFIG_COMPAT_VDSO |
d1befa65 | 80 | if (vma->vm_mm->context.compat_mm) |
35bb092a MS |
81 | vdso_pages = vdso32_pages; |
82 | #endif | |
83 | ||
84 | if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start) | |
85 | return -EINVAL; | |
86 | ||
87 | if (WARN_ON_ONCE(current->mm != vma->vm_mm)) | |
88 | return -EFAULT; | |
89 | ||
90 | current->mm->context.vdso_base = vma->vm_start; | |
91 | return 0; | |
92 | } | |
93 | ||
94 | static const struct vm_special_mapping vdso_mapping = { | |
95 | .name = "[vdso]", | |
96 | .fault = vdso_fault, | |
97 | .mremap = vdso_mremap, | |
98 | }; | |
99 | ||
b020632e MS |
100 | static int __init vdso_setup(char *s) |
101 | { | |
7aa79f94 MS |
102 | unsigned long val; |
103 | int rc; | |
104 | ||
105 | rc = 0; | |
106 | if (strncmp(s, "on", 3) == 0) | |
107 | vdso_enabled = 1; | |
108 | else if (strncmp(s, "off", 4) == 0) | |
109 | vdso_enabled = 0; | |
110 | else { | |
958d9072 | 111 | rc = kstrtoul(s, 0, &val); |
7aa79f94 MS |
112 | vdso_enabled = rc ? 0 : !!val; |
113 | } | |
114 | return !rc; | |
b020632e MS |
115 | } |
116 | __setup("vdso=", vdso_setup); | |
117 | ||
118 | /* | |
119 | * The vdso data page | |
120 | */ | |
121 | static union { | |
122 | struct vdso_data data; | |
123 | u8 page[PAGE_SIZE]; | |
abe1ee3a | 124 | } vdso_data_store __page_aligned_data; |
b020632e MS |
125 | struct vdso_data *vdso_data = &vdso_data_store.data; |
126 | ||
c742b31c MS |
127 | /* |
128 | * Setup vdso data page. | |
129 | */ | |
249c543b | 130 | static void __init vdso_init_data(struct vdso_data *vd) |
c742b31c | 131 | { |
e258d719 | 132 | vd->ectg_available = test_facility(31); |
c742b31c MS |
133 | } |
134 | ||
c742b31c MS |
135 | /* |
136 | * Allocate/free per cpu vdso data. | |
137 | */ | |
c742b31c | 138 | #define SEGMENT_ORDER 2 |
c742b31c | 139 | |
608796ff MS |
140 | /* |
141 | * The initial vdso_data structure for the boot CPU. Eventually | |
142 | * it is replaced with a properly allocated structure in vdso_init. | |
143 | * This is necessary because a valid S390_lowcore.vdso_per_cpu_data | |
144 | * pointer is required to be able to return from an interrupt or | |
145 | * program check. See the exit paths in entry.S. | |
146 | */ | |
147 | struct vdso_data boot_vdso_data __initdata; | |
148 | ||
149 | void __init vdso_alloc_boot_cpu(struct lowcore *lowcore) | |
150 | { | |
151 | lowcore->vdso_per_cpu_data = (unsigned long) &boot_vdso_data; | |
152 | } | |
153 | ||
c667aeac | 154 | int vdso_alloc_per_cpu(struct lowcore *lowcore) |
c742b31c MS |
155 | { |
156 | unsigned long segment_table, page_table, page_frame; | |
249c543b | 157 | struct vdso_per_cpu_data *vd; |
c742b31c MS |
158 | |
159 | segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); | |
0aaba41b | 160 | page_table = get_zeroed_page(GFP_KERNEL); |
c742b31c MS |
161 | page_frame = get_zeroed_page(GFP_KERNEL); |
162 | if (!segment_table || !page_table || !page_frame) | |
163 | goto out; | |
c9b5ad54 MS |
164 | arch_set_page_dat(virt_to_page(segment_table), SEGMENT_ORDER); |
165 | arch_set_page_dat(virt_to_page(page_table), 0); | |
c742b31c | 166 | |
249c543b MS |
167 | /* Initialize per-cpu vdso data page */ |
168 | vd = (struct vdso_per_cpu_data *) page_frame; | |
169 | vd->cpu_nr = lowcore->cpu_nr; | |
170 | vd->node_id = cpu_to_node(vd->cpu_nr); | |
171 | ||
0aaba41b | 172 | /* Set up page table for the vdso address space */ |
41879ff6 HC |
173 | memset64((u64 *)segment_table, _SEGMENT_ENTRY_EMPTY, _CRST_ENTRIES); |
174 | memset64((u64 *)page_table, _PAGE_INVALID, PTRS_PER_PTE); | |
c742b31c MS |
175 | |
176 | *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; | |
e5098611 | 177 | *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; |
c742b31c | 178 | |
0aaba41b | 179 | lowcore->vdso_asce = segment_table + |
c742b31c | 180 | _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; |
c742b31c MS |
181 | lowcore->vdso_per_cpu_data = page_frame; |
182 | ||
c742b31c MS |
183 | return 0; |
184 | ||
185 | out: | |
186 | free_page(page_frame); | |
187 | free_page(page_table); | |
188 | free_pages(segment_table, SEGMENT_ORDER); | |
189 | return -ENOMEM; | |
190 | } | |
191 | ||
c667aeac | 192 | void vdso_free_per_cpu(struct lowcore *lowcore) |
c742b31c MS |
193 | { |
194 | unsigned long segment_table, page_table, page_frame; | |
c742b31c | 195 | |
0aaba41b | 196 | segment_table = lowcore->vdso_asce & PAGE_MASK; |
c742b31c MS |
197 | page_table = *(unsigned long *) segment_table; |
198 | page_frame = *(unsigned long *) page_table; | |
199 | ||
200 | free_page(page_frame); | |
201 | free_page(page_table); | |
202 | free_pages(segment_table, SEGMENT_ORDER); | |
203 | } | |
c742b31c | 204 | |
b020632e MS |
205 | /* |
206 | * This is called from binfmt_elf, we create the special vma for the | |
207 | * vDSO and insert it into the mm struct tree | |
208 | */ | |
209 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |
210 | { | |
211 | struct mm_struct *mm = current->mm; | |
35bb092a | 212 | struct vm_area_struct *vma; |
b020632e MS |
213 | unsigned long vdso_pages; |
214 | unsigned long vdso_base; | |
215 | int rc; | |
216 | ||
217 | if (!vdso_enabled) | |
218 | return 0; | |
b020632e | 219 | |
b020632e | 220 | vdso_pages = vdso64_pages; |
96ca7674 | 221 | #ifdef CONFIG_COMPAT_VDSO |
190f056f VG |
222 | mm->context.compat_mm = is_compat_task(); |
223 | if (mm->context.compat_mm) | |
b020632e | 224 | vdso_pages = vdso32_pages; |
b020632e | 225 | #endif |
b020632e MS |
226 | /* |
227 | * vDSO has a problem and was disabled, just don't "enable" it for | |
228 | * the process | |
229 | */ | |
230 | if (vdso_pages == 0) | |
231 | return 0; | |
232 | ||
b020632e MS |
233 | /* |
234 | * pick a base address for the vDSO in process space. We try to put | |
235 | * it at vdso_base which is the "natural" base for it, but we might | |
236 | * fail and end up putting it elsewhere. | |
237 | */ | |
69048176 MH |
238 | if (down_write_killable(&mm->mmap_sem)) |
239 | return -EINTR; | |
e7828bbd | 240 | vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0); |
b020632e MS |
241 | if (IS_ERR_VALUE(vdso_base)) { |
242 | rc = vdso_base; | |
243 | goto out_up; | |
244 | } | |
245 | ||
246 | /* | |
247 | * our vma flags don't have VM_WRITE so by default, the process | |
248 | * isn't allowed to write those pages. | |
249 | * gdb can break that with ptrace interface, and thus trigger COW | |
250 | * on those pages but it's then your responsibility to never do that | |
251 | * on the "data" page of the vDSO or you'll stop getting kernel | |
252 | * updates and your nice userland gettimeofday will be totally dead. | |
253 | * It's fine to use that for setting breakpoints in the vDSO code | |
909af768 | 254 | * pages though. |
b020632e | 255 | */ |
35bb092a MS |
256 | vma = _install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT, |
257 | VM_READ|VM_EXEC| | |
258 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, | |
259 | &vdso_mapping); | |
260 | if (IS_ERR(vma)) { | |
261 | rc = PTR_ERR(vma); | |
262 | goto out_up; | |
263 | } | |
264 | ||
265 | current->mm->context.vdso_base = vdso_base; | |
266 | rc = 0; | |
267 | ||
b020632e MS |
268 | out_up: |
269 | up_write(&mm->mmap_sem); | |
270 | return rc; | |
271 | } | |
272 | ||
b020632e MS |
273 | static int __init vdso_init(void) |
274 | { | |
275 | int i; | |
276 | ||
c742b31c | 277 | vdso_init_data(vdso_data); |
96ca7674 | 278 | #ifdef CONFIG_COMPAT_VDSO |
b020632e MS |
279 | /* Calculate the size of the 32 bit vDSO */ |
280 | vdso32_pages = ((&vdso32_end - &vdso32_start | |
281 | + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; | |
282 | ||
283 | /* Make sure pages are in the correct state */ | |
6396bb22 | 284 | vdso32_pagelist = kcalloc(vdso32_pages + 1, sizeof(struct page *), |
b020632e MS |
285 | GFP_KERNEL); |
286 | BUG_ON(vdso32_pagelist == NULL); | |
287 | for (i = 0; i < vdso32_pages - 1; i++) { | |
288 | struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); | |
b020632e MS |
289 | get_page(pg); |
290 | vdso32_pagelist[i] = pg; | |
291 | } | |
292 | vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data); | |
293 | vdso32_pagelist[vdso32_pages] = NULL; | |
294 | #endif | |
295 | ||
b020632e MS |
296 | /* Calculate the size of the 64 bit vDSO */ |
297 | vdso64_pages = ((&vdso64_end - &vdso64_start | |
298 | + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; | |
299 | ||
300 | /* Make sure pages are in the correct state */ | |
6396bb22 | 301 | vdso64_pagelist = kcalloc(vdso64_pages + 1, sizeof(struct page *), |
b020632e MS |
302 | GFP_KERNEL); |
303 | BUG_ON(vdso64_pagelist == NULL); | |
304 | for (i = 0; i < vdso64_pages - 1; i++) { | |
305 | struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE); | |
b020632e MS |
306 | get_page(pg); |
307 | vdso64_pagelist[i] = pg; | |
308 | } | |
309 | vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); | |
310 | vdso64_pagelist[vdso64_pages] = NULL; | |
8b646bd7 | 311 | if (vdso_alloc_per_cpu(&S390_lowcore)) |
81ffa041 | 312 | BUG(); |
b020632e MS |
313 | |
314 | get_page(virt_to_page(vdso_data)); | |
315 | ||
b020632e MS |
316 | return 0; |
317 | } | |
8b646bd7 | 318 | early_initcall(vdso_init); |