Commit | Line | Data |
---|---|---|
2aae950b | 1 | /* |
2aae950b AK |
2 | * Copyright 2007 Andi Kleen, SUSE Labs. |
3 | * Subject to the GPL, v.2 | |
1c0c1b93 AL |
4 | * |
5 | * This contains most of the x86 vDSO kernel-side code. | |
2aae950b AK |
6 | */ |
7 | #include <linux/mm.h> | |
4e950f6f | 8 | #include <linux/err.h> |
2aae950b | 9 | #include <linux/sched.h> |
5a0e3ad6 | 10 | #include <linux/slab.h> |
2aae950b AK |
11 | #include <linux/init.h> |
12 | #include <linux/random.h> | |
3fa89ca7 | 13 | #include <linux/elf.h> |
d4f829dd | 14 | #include <linux/cpu.h> |
2aae950b AK |
15 | #include <asm/vgtod.h> |
16 | #include <asm/proto.h> | |
7f3646aa | 17 | #include <asm/vdso.h> |
1c0c1b93 | 18 | #include <asm/vvar.h> |
aafade24 | 19 | #include <asm/page.h> |
18d0a6fd | 20 | #include <asm/hpet.h> |
d4f829dd | 21 | #include <asm/desc.h> |
2aae950b | 22 | |
b4b541a6 | 23 | #if defined(CONFIG_X86_64) |
3d7ee969 | 24 | unsigned int __read_mostly vdso64_enabled = 1; |
b4b541a6 | 25 | #endif |
1a21d4e0 | 26 | |
6f121e54 | 27 | void __init init_vdso_image(const struct vdso_image *image) |
1a21d4e0 | 28 | { |
1a21d4e0 | 29 | int i; |
6f121e54 | 30 | int npages = (image->size) / PAGE_SIZE; |
1a21d4e0 | 31 | |
6f121e54 AL |
32 | BUG_ON(image->size % PAGE_SIZE != 0); |
33 | for (i = 0; i < npages; i++) | |
a62c34bd AL |
34 | image->text_mapping.pages[i] = |
35 | virt_to_page(image->data + i*PAGE_SIZE); | |
1a21d4e0 | 36 | |
6f121e54 AL |
37 | apply_alternatives((struct alt_instr *)(image->data + image->alt), |
38 | (struct alt_instr *)(image->data + image->alt + | |
39 | image->alt_len)); | |
1a21d4e0 | 40 | } |
1b3f2a72 | 41 | |
2aae950b AK |
42 | struct linux_binprm; |
43 | ||
394f56fe AL |
44 | /* |
45 | * Put the vdso above the (randomized) stack with another randomized | |
46 | * offset. This way there is no hole in the middle of address space. | |
47 | * To save memory make sure it is still in the same PTE as the stack | |
48 | * top. This doesn't give that many random bits. | |
49 | * | |
50 | * Note that this algorithm is imperfect: the distribution of the vdso | |
51 | * start address within a PMD is biased toward the end. | |
52 | * | |
53 | * Only used for the 64-bit and x32 vdsos. | |
54 | */ | |
2aae950b AK |
55 | static unsigned long vdso_addr(unsigned long start, unsigned len) |
56 | { | |
d093601b JB |
57 | #ifdef CONFIG_X86_32 |
58 | return 0; | |
59 | #else | |
2aae950b AK |
60 | unsigned long addr, end; |
61 | unsigned offset; | |
394f56fe AL |
62 | |
63 | /* | |
64 | * Round up the start address. It can start out unaligned as a result | |
65 | * of stack start randomization. | |
66 | */ | |
67 | start = PAGE_ALIGN(start); | |
68 | ||
69 | /* Round the lowest possible end address up to a PMD boundary. */ | |
70 | end = (start + len + PMD_SIZE - 1) & PMD_MASK; | |
d9517346 IM |
71 | if (end >= TASK_SIZE_MAX) |
72 | end = TASK_SIZE_MAX; | |
2aae950b | 73 | end -= len; |
394f56fe AL |
74 | |
75 | if (end > start) { | |
76 | offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1); | |
77 | addr = start + (offset << PAGE_SHIFT); | |
78 | } else { | |
79 | addr = start; | |
80 | } | |
dfb09f9b BP |
81 | |
82 | /* | |
394f56fe AL |
83 | * Forcibly align the final address in case we have a hardware |
84 | * issue that requires alignment for performance reasons. | |
dfb09f9b | 85 | */ |
f9902472 | 86 | addr = align_vdso_addr(addr); |
dfb09f9b | 87 | |
2aae950b | 88 | return addr; |
d093601b | 89 | #endif |
2aae950b AK |
90 | } |
91 | ||
18d0a6fd | 92 | static int map_vdso(const struct vdso_image *image, bool calculate_addr) |
2aae950b AK |
93 | { |
94 | struct mm_struct *mm = current->mm; | |
18d0a6fd | 95 | struct vm_area_struct *vma; |
e6577a7c | 96 | unsigned long addr, text_start; |
18d0a6fd | 97 | int ret = 0; |
1e844fb4 | 98 | static struct page *no_pages[] = {NULL}; |
a62c34bd AL |
99 | static struct vm_special_mapping vvar_mapping = { |
100 | .name = "[vvar]", | |
101 | .pages = no_pages, | |
102 | }; | |
2aae950b | 103 | |
18d0a6fd AL |
104 | if (calculate_addr) { |
105 | addr = vdso_addr(current->mm->start_stack, | |
e6577a7c | 106 | image->size - image->sym_vvar_start); |
18d0a6fd AL |
107 | } else { |
108 | addr = 0; | |
109 | } | |
2aae950b AK |
110 | |
111 | down_write(&mm->mmap_sem); | |
18d0a6fd | 112 | |
e6577a7c AL |
113 | addr = get_unmapped_area(NULL, addr, |
114 | image->size - image->sym_vvar_start, 0, 0); | |
2aae950b AK |
115 | if (IS_ERR_VALUE(addr)) { |
116 | ret = addr; | |
117 | goto up_fail; | |
118 | } | |
119 | ||
e6577a7c AL |
120 | text_start = addr - image->sym_vvar_start; |
121 | current->mm->context.vdso = (void __user *)text_start; | |
f7b6eb3f | 122 | |
18d0a6fd AL |
123 | /* |
124 | * MAYWRITE to allow gdb to COW and set breakpoints | |
125 | */ | |
a62c34bd | 126 | vma = _install_special_mapping(mm, |
e6577a7c | 127 | text_start, |
a62c34bd AL |
128 | image->size, |
129 | VM_READ|VM_EXEC| | |
130 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, | |
131 | &image->text_mapping); | |
18d0a6fd | 132 | |
a62c34bd AL |
133 | if (IS_ERR(vma)) { |
134 | ret = PTR_ERR(vma); | |
18d0a6fd | 135 | goto up_fail; |
a62c34bd | 136 | } |
18d0a6fd AL |
137 | |
138 | vma = _install_special_mapping(mm, | |
e6577a7c AL |
139 | addr, |
140 | -image->sym_vvar_start, | |
ac379835 | 141 | VM_READ|VM_MAYREAD, |
a62c34bd | 142 | &vvar_mapping); |
18d0a6fd AL |
143 | |
144 | if (IS_ERR(vma)) { | |
145 | ret = PTR_ERR(vma); | |
2aae950b | 146 | goto up_fail; |
f7b6eb3f | 147 | } |
2aae950b | 148 | |
18d0a6fd AL |
149 | if (image->sym_vvar_page) |
150 | ret = remap_pfn_range(vma, | |
e6577a7c | 151 | text_start + image->sym_vvar_page, |
18d0a6fd AL |
152 | __pa_symbol(&__vvar_page) >> PAGE_SHIFT, |
153 | PAGE_SIZE, | |
154 | PAGE_READONLY); | |
155 | ||
156 | if (ret) | |
157 | goto up_fail; | |
158 | ||
159 | #ifdef CONFIG_HPET_TIMER | |
160 | if (hpet_address && image->sym_hpet_page) { | |
161 | ret = io_remap_pfn_range(vma, | |
e6577a7c | 162 | text_start + image->sym_hpet_page, |
18d0a6fd AL |
163 | hpet_address >> PAGE_SHIFT, |
164 | PAGE_SIZE, | |
165 | pgprot_noncached(PAGE_READONLY)); | |
166 | ||
167 | if (ret) | |
168 | goto up_fail; | |
169 | } | |
170 | #endif | |
171 | ||
2aae950b | 172 | up_fail: |
18d0a6fd AL |
173 | if (ret) |
174 | current->mm->context.vdso = NULL; | |
175 | ||
2aae950b AK |
176 | up_write(&mm->mmap_sem); |
177 | return ret; | |
178 | } | |
179 | ||
18d0a6fd AL |
180 | #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) |
181 | static int load_vdso32(void) | |
182 | { | |
183 | int ret; | |
184 | ||
185 | if (vdso32_enabled != 1) /* Other values all mean "disabled" */ | |
186 | return 0; | |
187 | ||
188 | ret = map_vdso(selected_vdso32, false); | |
189 | if (ret) | |
190 | return ret; | |
191 | ||
192 | if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN) | |
193 | current_thread_info()->sysenter_return = | |
194 | current->mm->context.vdso + | |
195 | selected_vdso32->sym_VDSO32_SYSENTER_RETURN; | |
196 | ||
197 | return 0; | |
198 | } | |
199 | #endif | |
200 | ||
201 | #ifdef CONFIG_X86_64 | |
1a21d4e0 L |
202 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
203 | { | |
18d0a6fd AL |
204 | if (!vdso64_enabled) |
205 | return 0; | |
206 | ||
207 | return map_vdso(&vdso_image_64, true); | |
1a21d4e0 L |
208 | } |
209 | ||
18d0a6fd AL |
210 | #ifdef CONFIG_COMPAT |
211 | int compat_arch_setup_additional_pages(struct linux_binprm *bprm, | |
212 | int uses_interp) | |
213 | { | |
1a21d4e0 | 214 | #ifdef CONFIG_X86_X32_ABI |
18d0a6fd AL |
215 | if (test_thread_flag(TIF_X32)) { |
216 | if (!vdso64_enabled) | |
217 | return 0; | |
218 | ||
219 | return map_vdso(&vdso_image_x32, true); | |
220 | } | |
221 | #endif | |
222 | ||
223 | return load_vdso32(); | |
224 | } | |
225 | #endif | |
226 | #else | |
227 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |
1a21d4e0 | 228 | { |
18d0a6fd | 229 | return load_vdso32(); |
1a21d4e0 L |
230 | } |
231 | #endif | |
232 | ||
18d0a6fd | 233 | #ifdef CONFIG_X86_64 |
2aae950b AK |
234 | static __init int vdso_setup(char *s) |
235 | { | |
3d7ee969 | 236 | vdso64_enabled = simple_strtoul(s, NULL, 0); |
2aae950b AK |
237 | return 0; |
238 | } | |
239 | __setup("vdso=", vdso_setup); | |
b4b541a6 | 240 | #endif |
d4f829dd AL |
241 | |
242 | #ifdef CONFIG_X86_64 | |
1c0c1b93 | 243 | static void vgetcpu_cpu_init(void *arg) |
d4f829dd | 244 | { |
1c0c1b93 | 245 | int cpu = smp_processor_id(); |
a92f101b | 246 | struct desc_struct d = { }; |
d4f829dd AL |
247 | unsigned long node = 0; |
248 | #ifdef CONFIG_NUMA | |
249 | node = cpu_to_node(cpu); | |
250 | #endif | |
251 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) | |
252 | write_rdtscp_aux((node << 12) | cpu); | |
253 | ||
254 | /* | |
25880156 AL |
255 | * Store cpu number in limit so that it can be loaded |
256 | * quickly in user space in vgetcpu. (12 bits for the CPU | |
257 | * and 8 bits for the node) | |
d4f829dd | 258 | */ |
a92f101b AM |
259 | d.limit0 = cpu | ((node & 0xf) << 12); |
260 | d.limit = node >> 4; | |
261 | d.type = 5; /* RO data, expand down, accessed */ | |
262 | d.dpl = 3; /* Visible to user code */ | |
263 | d.s = 1; /* Not a system segment */ | |
264 | d.p = 1; /* Present */ | |
265 | d.d = 1; /* 32-bit */ | |
d4f829dd AL |
266 | |
267 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); | |
268 | } | |
269 | ||
d4f829dd | 270 | static int |
1c0c1b93 | 271 | vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg) |
d4f829dd AL |
272 | { |
273 | long cpu = (long)arg; | |
274 | ||
275 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | |
1c0c1b93 | 276 | smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1); |
d4f829dd AL |
277 | |
278 | return NOTIFY_DONE; | |
279 | } | |
280 | ||
1c0c1b93 | 281 | static int __init init_vdso(void) |
d4f829dd | 282 | { |
1c0c1b93 AL |
283 | init_vdso_image(&vdso_image_64); |
284 | ||
285 | #ifdef CONFIG_X86_X32_ABI | |
286 | init_vdso_image(&vdso_image_x32); | |
287 | #endif | |
288 | ||
d4f829dd AL |
289 | cpu_notifier_register_begin(); |
290 | ||
1c0c1b93 | 291 | on_each_cpu(vgetcpu_cpu_init, NULL, 1); |
d4f829dd | 292 | /* notifier priority > KVM */ |
1c0c1b93 | 293 | __hotcpu_notifier(vgetcpu_cpu_notifier, 30); |
d4f829dd AL |
294 | |
295 | cpu_notifier_register_done(); | |
296 | ||
297 | return 0; | |
298 | } | |
1c0c1b93 AL |
299 | subsys_initcall(init_vdso); |
300 | #endif /* CONFIG_X86_64 */ |