Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 | 2 | /* |
1da177e4 LT |
3 | * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds |
4 | * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> | |
5 | * Copyright (C) 2002 Andi Kleen | |
78aa1f66 | 6 | * |
1da177e4 | 7 | * This handles calls from both 32bit and 64bit mode. |
c2b3496b PZ |
8 | * |
9 | * Lock order: | |
10 | * contex.ldt_usr_sem | |
11 | * mmap_sem | |
12 | * context.lock | |
1da177e4 LT |
13 | */ |
14 | ||
15 | #include <linux/errno.h> | |
5a0e3ad6 | 16 | #include <linux/gfp.h> |
1da177e4 LT |
17 | #include <linux/sched.h> |
18 | #include <linux/string.h> | |
19 | #include <linux/mm.h> | |
20 | #include <linux/smp.h> | |
da20ab35 | 21 | #include <linux/syscalls.h> |
37868fe1 | 22 | #include <linux/slab.h> |
1da177e4 | 23 | #include <linux/vmalloc.h> |
423a5405 | 24 | #include <linux/uaccess.h> |
1da177e4 | 25 | |
1da177e4 | 26 | #include <asm/ldt.h> |
f55f0501 | 27 | #include <asm/tlb.h> |
1da177e4 | 28 | #include <asm/desc.h> |
70f5088d | 29 | #include <asm/mmu_context.h> |
bbc1f698 | 30 | #include <asm/syscalls.h> |
1da177e4 | 31 | |
a6323757 AL |
32 | static void refresh_ldt_segments(void) |
33 | { | |
34 | #ifdef CONFIG_X86_64 | |
35 | unsigned short sel; | |
36 | ||
37 | /* | |
38 | * Make sure that the cached DS and ES descriptors match the updated | |
39 | * LDT. | |
40 | */ | |
41 | savesegment(ds, sel); | |
42 | if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) | |
43 | loadsegment(ds, sel); | |
44 | ||
45 | savesegment(es, sel); | |
46 | if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) | |
47 | loadsegment(es, sel); | |
48 | #endif | |
49 | } | |
50 | ||
c2b3496b | 51 | /* context.lock is held by the task which issued the smp function call */ |
3d28ebce | 52 | static void flush_ldt(void *__mm) |
1da177e4 | 53 | { |
3d28ebce | 54 | struct mm_struct *mm = __mm; |
37868fe1 | 55 | |
3d28ebce | 56 | if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) |
37868fe1 AL |
57 | return; |
58 | ||
f55f0501 | 59 | load_mm_ldt(mm); |
a6323757 AL |
60 | |
61 | refresh_ldt_segments(); | |
1da177e4 | 62 | } |
1da177e4 | 63 | |
37868fe1 | 64 | /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ |
bbf79d21 | 65 | static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) |
1da177e4 | 66 | { |
37868fe1 | 67 | struct ldt_struct *new_ldt; |
990e9dc3 | 68 | unsigned int alloc_size; |
37868fe1 | 69 | |
bbf79d21 | 70 | if (num_entries > LDT_ENTRIES) |
37868fe1 AL |
71 | return NULL; |
72 | ||
73 | new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); | |
74 | if (!new_ldt) | |
75 | return NULL; | |
76 | ||
77 | BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); | |
bbf79d21 | 78 | alloc_size = num_entries * LDT_ENTRY_SIZE; |
37868fe1 AL |
79 | |
80 | /* | |
81 | * Xen is very picky: it requires a page-aligned LDT that has no | |
82 | * trailing nonzero bytes in any page that contains LDT descriptors. | |
83 | * Keep it simple: zero the whole allocation and never allocate less | |
84 | * than PAGE_SIZE. | |
85 | */ | |
86 | if (alloc_size > PAGE_SIZE) | |
87 | new_ldt->entries = vzalloc(alloc_size); | |
1da177e4 | 88 | else |
f454b478 | 89 | new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL); |
1da177e4 | 90 | |
37868fe1 AL |
91 | if (!new_ldt->entries) { |
92 | kfree(new_ldt); | |
93 | return NULL; | |
94 | } | |
77e463d1 | 95 | |
f55f0501 AL |
96 | /* The new LDT isn't aliased for PTI yet. */ |
97 | new_ldt->slot = -1; | |
98 | ||
bbf79d21 | 99 | new_ldt->nr_entries = num_entries; |
37868fe1 AL |
100 | return new_ldt; |
101 | } | |
38ffbe66 | 102 | |
f55f0501 AL |
103 | /* |
104 | * If PTI is enabled, this maps the LDT into the kernelmode and | |
105 | * usermode tables for the given mm. | |
106 | * | |
107 | * There is no corresponding unmap function. Even if the LDT is freed, we | |
108 | * leave the PTEs around until the slot is reused or the mm is destroyed. | |
109 | * This is harmless: the LDT is always in ordinary memory, and no one will | |
110 | * access the freed slot. | |
111 | * | |
112 | * If we wanted to unmap freed LDTs, we'd also need to do a flush to make | |
113 | * it useful, and the flush would slow down modify_ldt(). | |
114 | */ | |
115 | static int | |
116 | map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) | |
117 | { | |
118 | #ifdef CONFIG_PAGE_TABLE_ISOLATION | |
119 | bool is_vmalloc, had_top_level_entry; | |
120 | unsigned long va; | |
121 | spinlock_t *ptl; | |
122 | pgd_t *pgd; | |
123 | int i; | |
124 | ||
125 | if (!static_cpu_has(X86_FEATURE_PTI)) | |
126 | return 0; | |
127 | ||
128 | /* | |
129 | * Any given ldt_struct should have map_ldt_struct() called at most | |
130 | * once. | |
131 | */ | |
132 | WARN_ON(ldt->slot != -1); | |
133 | ||
134 | /* | |
135 | * Did we already have the top level entry allocated? We can't | |
136 | * use pgd_none() for this because it doens't do anything on | |
137 | * 4-level page table kernels. | |
138 | */ | |
139 | pgd = pgd_offset(mm, LDT_BASE_ADDR); | |
140 | had_top_level_entry = (pgd->pgd != 0); | |
141 | ||
142 | is_vmalloc = is_vmalloc_addr(ldt->entries); | |
143 | ||
144 | for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) { | |
145 | unsigned long offset = i << PAGE_SHIFT; | |
146 | const void *src = (char *)ldt->entries + offset; | |
147 | unsigned long pfn; | |
fb43d6cb | 148 | pgprot_t pte_prot; |
f55f0501 AL |
149 | pte_t pte, *ptep; |
150 | ||
151 | va = (unsigned long)ldt_slot_va(slot) + offset; | |
152 | pfn = is_vmalloc ? vmalloc_to_pfn(src) : | |
153 | page_to_pfn(virt_to_page(src)); | |
154 | /* | |
155 | * Treat the PTI LDT range as a *userspace* range. | |
156 | * get_locked_pte() will allocate all needed pagetables | |
157 | * and account for them in this mm. | |
158 | */ | |
159 | ptep = get_locked_pte(mm, va, &ptl); | |
160 | if (!ptep) | |
161 | return -ENOMEM; | |
9f5cb6b3 TG |
162 | /* |
163 | * Map it RO so the easy to find address is not a primary | |
164 | * target via some kernel interface which misses a | |
165 | * permission check. | |
166 | */ | |
fb43d6cb DH |
167 | pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL); |
168 | /* Filter out unsuppored __PAGE_KERNEL* bits: */ | |
e6f39e87 | 169 | pgprot_val(pte_prot) &= __supported_pte_mask; |
fb43d6cb | 170 | pte = pfn_pte(pfn, pte_prot); |
f55f0501 AL |
171 | set_pte_at(mm, va, ptep, pte); |
172 | pte_unmap_unlock(ptep, ptl); | |
173 | } | |
174 | ||
175 | if (mm->context.ldt) { | |
176 | /* | |
177 | * We already had an LDT. The top-level entry should already | |
178 | * have been allocated and synchronized with the usermode | |
179 | * tables. | |
180 | */ | |
181 | WARN_ON(!had_top_level_entry); | |
182 | if (static_cpu_has(X86_FEATURE_PTI)) | |
183 | WARN_ON(!kernel_to_user_pgdp(pgd)->pgd); | |
184 | } else { | |
185 | /* | |
186 | * This is the first time we're mapping an LDT for this process. | |
187 | * Sync the pgd to the usermode tables. | |
188 | */ | |
189 | WARN_ON(had_top_level_entry); | |
190 | if (static_cpu_has(X86_FEATURE_PTI)) { | |
191 | WARN_ON(kernel_to_user_pgdp(pgd)->pgd); | |
192 | set_pgd(kernel_to_user_pgdp(pgd), *pgd); | |
193 | } | |
194 | } | |
195 | ||
196 | va = (unsigned long)ldt_slot_va(slot); | |
197 | flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0); | |
198 | ||
199 | ldt->slot = slot; | |
200 | #endif | |
201 | return 0; | |
202 | } | |
203 | ||
204 | static void free_ldt_pgtables(struct mm_struct *mm) | |
205 | { | |
206 | #ifdef CONFIG_PAGE_TABLE_ISOLATION | |
207 | struct mmu_gather tlb; | |
208 | unsigned long start = LDT_BASE_ADDR; | |
209 | unsigned long end = start + (1UL << PGDIR_SHIFT); | |
210 | ||
211 | if (!static_cpu_has(X86_FEATURE_PTI)) | |
212 | return; | |
213 | ||
214 | tlb_gather_mmu(&tlb, mm, start, end); | |
215 | free_pgd_range(&tlb, start, end, start, end); | |
216 | tlb_finish_mmu(&tlb, start, end); | |
217 | #endif | |
218 | } | |
219 | ||
37868fe1 AL |
220 | /* After calling this, the LDT is immutable. */ |
221 | static void finalize_ldt_struct(struct ldt_struct *ldt) | |
222 | { | |
bbf79d21 | 223 | paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); |
1da177e4 LT |
224 | } |
225 | ||
c2b3496b | 226 | static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt) |
1da177e4 | 227 | { |
c2b3496b PZ |
228 | mutex_lock(&mm->context.lock); |
229 | ||
3382290e | 230 | /* Synchronizes with READ_ONCE in load_mm_ldt. */ |
c2b3496b | 231 | smp_store_release(&mm->context.ldt, ldt); |
37868fe1 | 232 | |
c2b3496b PZ |
233 | /* Activate the LDT for all CPUs using currents mm. */ |
234 | on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true); | |
235 | ||
236 | mutex_unlock(&mm->context.lock); | |
37868fe1 | 237 | } |
78aa1f66 | 238 | |
37868fe1 AL |
239 | static void free_ldt_struct(struct ldt_struct *ldt) |
240 | { | |
241 | if (likely(!ldt)) | |
242 | return; | |
38ffbe66 | 243 | |
bbf79d21 BP |
244 | paravirt_free_ldt(ldt->entries, ldt->nr_entries); |
245 | if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE) | |
8d5341a6 | 246 | vfree_atomic(ldt->entries); |
37868fe1 | 247 | else |
f454b478 | 248 | free_page((unsigned long)ldt->entries); |
37868fe1 | 249 | kfree(ldt); |
1da177e4 LT |
250 | } |
251 | ||
252 | /* | |
a4828f81 TG |
253 | * Called on fork from arch_dup_mmap(). Just copy the current LDT state, |
254 | * the new task is not running, so nothing can be installed. | |
1da177e4 | 255 | */ |
a4828f81 | 256 | int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) |
1da177e4 | 257 | { |
37868fe1 | 258 | struct ldt_struct *new_ldt; |
1da177e4 LT |
259 | int retval = 0; |
260 | ||
a4828f81 | 261 | if (!old_mm) |
37868fe1 | 262 | return 0; |
37868fe1 AL |
263 | |
264 | mutex_lock(&old_mm->context.lock); | |
a4828f81 | 265 | if (!old_mm->context.ldt) |
37868fe1 | 266 | goto out_unlock; |
37868fe1 | 267 | |
bbf79d21 | 268 | new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); |
37868fe1 AL |
269 | if (!new_ldt) { |
270 | retval = -ENOMEM; | |
271 | goto out_unlock; | |
272 | } | |
273 | ||
274 | memcpy(new_ldt->entries, old_mm->context.ldt->entries, | |
bbf79d21 | 275 | new_ldt->nr_entries * LDT_ENTRY_SIZE); |
37868fe1 AL |
276 | finalize_ldt_struct(new_ldt); |
277 | ||
f55f0501 AL |
278 | retval = map_ldt_struct(mm, new_ldt, 0); |
279 | if (retval) { | |
280 | free_ldt_pgtables(mm); | |
281 | free_ldt_struct(new_ldt); | |
282 | goto out_unlock; | |
283 | } | |
37868fe1 AL |
284 | mm->context.ldt = new_ldt; |
285 | ||
286 | out_unlock: | |
287 | mutex_unlock(&old_mm->context.lock); | |
1da177e4 LT |
288 | return retval; |
289 | } | |
290 | ||
291 | /* | |
77e463d1 TG |
292 | * No need to lock the MM as we are the last user |
293 | * | |
294 | * 64bit: Don't touch the LDT register - we're already in the next thread. | |
1da177e4 | 295 | */ |
39a0526f | 296 | void destroy_context_ldt(struct mm_struct *mm) |
1da177e4 | 297 | { |
37868fe1 AL |
298 | free_ldt_struct(mm->context.ldt); |
299 | mm->context.ldt = NULL; | |
1da177e4 LT |
300 | } |
301 | ||
f55f0501 AL |
302 | void ldt_arch_exit_mmap(struct mm_struct *mm) |
303 | { | |
304 | free_ldt_pgtables(mm); | |
305 | } | |
306 | ||
78aa1f66 | 307 | static int read_ldt(void __user *ptr, unsigned long bytecount) |
1da177e4 | 308 | { |
78aa1f66 | 309 | struct mm_struct *mm = current->mm; |
bbf79d21 BP |
310 | unsigned long entries_size; |
311 | int retval; | |
1da177e4 | 312 | |
c2b3496b | 313 | down_read(&mm->context.ldt_usr_sem); |
37868fe1 AL |
314 | |
315 | if (!mm->context.ldt) { | |
316 | retval = 0; | |
317 | goto out_unlock; | |
318 | } | |
319 | ||
78aa1f66 TG |
320 | if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) |
321 | bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; | |
1da177e4 | 322 | |
bbf79d21 BP |
323 | entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE; |
324 | if (entries_size > bytecount) | |
325 | entries_size = bytecount; | |
1da177e4 | 326 | |
bbf79d21 | 327 | if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) { |
37868fe1 AL |
328 | retval = -EFAULT; |
329 | goto out_unlock; | |
330 | } | |
331 | ||
bbf79d21 | 332 | if (entries_size != bytecount) { |
37868fe1 | 333 | /* Zero-fill the rest and pretend we read bytecount bytes. */ |
bbf79d21 | 334 | if (clear_user(ptr + entries_size, bytecount - entries_size)) { |
37868fe1 AL |
335 | retval = -EFAULT; |
336 | goto out_unlock; | |
1da177e4 LT |
337 | } |
338 | } | |
37868fe1 AL |
339 | retval = bytecount; |
340 | ||
341 | out_unlock: | |
c2b3496b | 342 | up_read(&mm->context.ldt_usr_sem); |
37868fe1 | 343 | return retval; |
1da177e4 LT |
344 | } |
345 | ||
78aa1f66 | 346 | static int read_default_ldt(void __user *ptr, unsigned long bytecount) |
1da177e4 | 347 | { |
77e463d1 TG |
348 | /* CHECKME: Can we use _one_ random number ? */ |
349 | #ifdef CONFIG_X86_32 | |
350 | unsigned long size = 5 * sizeof(struct desc_struct); | |
351 | #else | |
352 | unsigned long size = 128; | |
353 | #endif | |
354 | if (bytecount > size) | |
355 | bytecount = size; | |
1da177e4 LT |
356 | if (clear_user(ptr, bytecount)) |
357 | return -EFAULT; | |
78aa1f66 | 358 | return bytecount; |
1da177e4 LT |
359 | } |
360 | ||
78aa1f66 | 361 | static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) |
1da177e4 | 362 | { |
70f5088d | 363 | struct mm_struct *mm = current->mm; |
990e9dc3 | 364 | struct ldt_struct *new_ldt, *old_ldt; |
bbf79d21 | 365 | unsigned int old_nr_entries, new_nr_entries; |
990e9dc3 | 366 | struct user_desc ldt_info; |
5af72502 | 367 | struct desc_struct ldt; |
1da177e4 | 368 | int error; |
1da177e4 LT |
369 | |
370 | error = -EINVAL; | |
1da177e4 LT |
371 | if (bytecount != sizeof(ldt_info)) |
372 | goto out; | |
78aa1f66 | 373 | error = -EFAULT; |
70f5088d | 374 | if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) |
1da177e4 LT |
375 | goto out; |
376 | ||
377 | error = -EINVAL; | |
378 | if (ldt_info.entry_number >= LDT_ENTRIES) | |
379 | goto out; | |
380 | if (ldt_info.contents == 3) { | |
381 | if (oldmode) | |
382 | goto out; | |
383 | if (ldt_info.seg_not_present == 0) | |
384 | goto out; | |
385 | } | |
386 | ||
37868fe1 AL |
387 | if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || |
388 | LDT_empty(&ldt_info)) { | |
389 | /* The user wants to clear the entry. */ | |
390 | memset(&ldt, 0, sizeof(ldt)); | |
391 | } else { | |
392 | if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { | |
393 | error = -EINVAL; | |
394 | goto out; | |
1da177e4 | 395 | } |
37868fe1 AL |
396 | |
397 | fill_ldt(&ldt, &ldt_info); | |
398 | if (oldmode) | |
399 | ldt.avl = 0; | |
1da177e4 LT |
400 | } |
401 | ||
c2b3496b PZ |
402 | if (down_write_killable(&mm->context.ldt_usr_sem)) |
403 | return -EINTR; | |
37868fe1 | 404 | |
bbf79d21 BP |
405 | old_ldt = mm->context.ldt; |
406 | old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; | |
407 | new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries); | |
37868fe1 AL |
408 | |
409 | error = -ENOMEM; | |
bbf79d21 | 410 | new_ldt = alloc_ldt_struct(new_nr_entries); |
37868fe1 | 411 | if (!new_ldt) |
34273f41 | 412 | goto out_unlock; |
34273f41 | 413 | |
37868fe1 | 414 | if (old_ldt) |
bbf79d21 BP |
415 | memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE); |
416 | ||
37868fe1 AL |
417 | new_ldt->entries[ldt_info.entry_number] = ldt; |
418 | finalize_ldt_struct(new_ldt); | |
1da177e4 | 419 | |
f55f0501 AL |
420 | /* |
421 | * If we are using PTI, map the new LDT into the userspace pagetables. | |
422 | * If there is already an LDT, use the other slot so that other CPUs | |
423 | * will continue to use the old LDT until install_ldt() switches | |
424 | * them over to the new LDT. | |
425 | */ | |
426 | error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0); | |
427 | if (error) { | |
a62d6985 TG |
428 | /* |
429 | * This only can fail for the first LDT setup. If an LDT is | |
430 | * already installed then the PTE page is already | |
431 | * populated. Mop up a half populated page table. | |
432 | */ | |
7f414195 TG |
433 | if (!WARN_ON_ONCE(old_ldt)) |
434 | free_ldt_pgtables(mm); | |
a62d6985 | 435 | free_ldt_struct(new_ldt); |
f55f0501 AL |
436 | goto out_unlock; |
437 | } | |
438 | ||
37868fe1 AL |
439 | install_ldt(mm, new_ldt); |
440 | free_ldt_struct(old_ldt); | |
1da177e4 LT |
441 | error = 0; |
442 | ||
443 | out_unlock: | |
c2b3496b | 444 | up_write(&mm->context.ldt_usr_sem); |
1da177e4 LT |
445 | out: |
446 | return error; | |
447 | } | |
448 | ||
da20ab35 DH |
449 | SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , |
450 | unsigned long , bytecount) | |
1da177e4 LT |
451 | { |
452 | int ret = -ENOSYS; | |
453 | ||
454 | switch (func) { | |
455 | case 0: | |
456 | ret = read_ldt(ptr, bytecount); | |
457 | break; | |
458 | case 1: | |
459 | ret = write_ldt(ptr, bytecount, 1); | |
460 | break; | |
461 | case 2: | |
462 | ret = read_default_ldt(ptr, bytecount); | |
463 | break; | |
464 | case 0x11: | |
465 | ret = write_ldt(ptr, bytecount, 0); | |
466 | break; | |
467 | } | |
da20ab35 DH |
468 | /* |
469 | * The SYSCALL_DEFINE() macros give us an 'unsigned long' | |
470 | * return type, but tht ABI for sys_modify_ldt() expects | |
471 | * 'int'. This cast gives us an int-sized value in %rax | |
472 | * for the return code. The 'unsigned' is necessary so | |
473 | * the compiler does not try to sign-extend the negative | |
474 | * return codes into the high half of the register when | |
475 | * taking the value from int->long. | |
476 | */ | |
477 | return (unsigned int)ret; | |
1da177e4 | 478 | } |