Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
1da177e4 | 2 | /* |
1da177e4 LT |
3 | * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds |
4 | * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> | |
5 | * Copyright (C) 2002 Andi Kleen | |
78aa1f66 | 6 | * |
1da177e4 | 7 | * This handles calls from both 32bit and 64bit mode. |
c2b3496b PZ |
8 | * |
9 | * Lock order: | |
10 | * contex.ldt_usr_sem | |
c1e8d7c6 | 11 | * mmap_lock |
c2b3496b | 12 | * context.lock |
1da177e4 LT |
13 | */ |
14 | ||
15 | #include <linux/errno.h> | |
5a0e3ad6 | 16 | #include <linux/gfp.h> |
1da177e4 LT |
17 | #include <linux/sched.h> |
18 | #include <linux/string.h> | |
19 | #include <linux/mm.h> | |
20 | #include <linux/smp.h> | |
da20ab35 | 21 | #include <linux/syscalls.h> |
37868fe1 | 22 | #include <linux/slab.h> |
1da177e4 | 23 | #include <linux/vmalloc.h> |
423a5405 | 24 | #include <linux/uaccess.h> |
1da177e4 | 25 | |
1da177e4 | 26 | #include <asm/ldt.h> |
f55f0501 | 27 | #include <asm/tlb.h> |
1da177e4 | 28 | #include <asm/desc.h> |
70f5088d | 29 | #include <asm/mmu_context.h> |
186525bd IM |
30 | #include <asm/pgtable_areas.h> |
31 | ||
cc801833 AL |
32 | #include <xen/xen.h> |
33 | ||
186525bd IM |
34 | /* This is a multiple of PAGE_SIZE. */ |
35 | #define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE) | |
36 | ||
37 | static inline void *ldt_slot_va(int slot) | |
38 | { | |
39 | return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); | |
40 | } | |
41 | ||
42 | void load_mm_ldt(struct mm_struct *mm) | |
43 | { | |
44 | struct ldt_struct *ldt; | |
45 | ||
46 | /* READ_ONCE synchronizes with smp_store_release */ | |
47 | ldt = READ_ONCE(mm->context.ldt); | |
48 | ||
49 | /* | |
50 | * Any change to mm->context.ldt is followed by an IPI to all | |
51 | * CPUs with the mm active. The LDT will not be freed until | |
52 | * after the IPI is handled by all such CPUs. This means that, | |
53 | * if the ldt_struct changes before we return, the values we see | |
54 | * will be safe, and the new values will be loaded before we run | |
55 | * any user code. | |
56 | * | |
57 | * NB: don't try to convert this to use RCU without extreme care. | |
58 | * We would still need IRQs off, because we don't want to change | |
59 | * the local LDT after an IPI loaded a newer value than the one | |
60 | * that we can see. | |
61 | */ | |
62 | ||
63 | if (unlikely(ldt)) { | |
64 | if (static_cpu_has(X86_FEATURE_PTI)) { | |
65 | if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) { | |
66 | /* | |
67 | * Whoops -- either the new LDT isn't mapped | |
68 | * (if slot == -1) or is mapped into a bogus | |
69 | * slot (if slot > 1). | |
70 | */ | |
71 | clear_LDT(); | |
72 | return; | |
73 | } | |
74 | ||
75 | /* | |
76 | * If page table isolation is enabled, ldt->entries | |
77 | * will not be mapped in the userspace pagetables. | |
78 | * Tell the CPU to access the LDT through the alias | |
79 | * at ldt_slot_va(ldt->slot). | |
80 | */ | |
81 | set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries); | |
82 | } else { | |
83 | set_ldt(ldt->entries, ldt->nr_entries); | |
84 | } | |
85 | } else { | |
86 | clear_LDT(); | |
87 | } | |
88 | } | |
89 | ||
90 | void switch_ldt(struct mm_struct *prev, struct mm_struct *next) | |
91 | { | |
92 | /* | |
93 | * Load the LDT if either the old or new mm had an LDT. | |
94 | * | |
95 | * An mm will never go from having an LDT to not having an LDT. Two | |
96 | * mms never share an LDT, so we don't gain anything by checking to | |
97 | * see whether the LDT changed. There's also no guarantee that | |
98 | * prev->context.ldt actually matches LDTR, but, if LDTR is non-NULL, | |
99 | * then prev->context.ldt will also be non-NULL. | |
100 | * | |
101 | * If we really cared, we could optimize the case where prev == next | |
102 | * and we're exiting lazy mode. Most of the time, if this happens, | |
103 | * we don't actually need to reload LDTR, but modify_ldt() is mostly | |
104 | * used by legacy code and emulators where we don't need this level of | |
105 | * performance. | |
106 | * | |
107 | * This uses | instead of || because it generates better code. | |
108 | */ | |
109 | if (unlikely((unsigned long)prev->context.ldt | | |
110 | (unsigned long)next->context.ldt)) | |
111 | load_mm_ldt(next); | |
112 | ||
113 | DEBUG_LOCKS_WARN_ON(preemptible()); | |
114 | } | |
1da177e4 | 115 | |
a6323757 AL |
116 | static void refresh_ldt_segments(void) |
117 | { | |
118 | #ifdef CONFIG_X86_64 | |
119 | unsigned short sel; | |
120 | ||
121 | /* | |
122 | * Make sure that the cached DS and ES descriptors match the updated | |
123 | * LDT. | |
124 | */ | |
125 | savesegment(ds, sel); | |
126 | if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) | |
127 | loadsegment(ds, sel); | |
128 | ||
129 | savesegment(es, sel); | |
130 | if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT) | |
131 | loadsegment(es, sel); | |
132 | #endif | |
133 | } | |
134 | ||
c2b3496b | 135 | /* context.lock is held by the task which issued the smp function call */ |
3d28ebce | 136 | static void flush_ldt(void *__mm) |
1da177e4 | 137 | { |
3d28ebce | 138 | struct mm_struct *mm = __mm; |
37868fe1 | 139 | |
3d28ebce | 140 | if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm) |
37868fe1 AL |
141 | return; |
142 | ||
f55f0501 | 143 | load_mm_ldt(mm); |
a6323757 AL |
144 | |
145 | refresh_ldt_segments(); | |
1da177e4 | 146 | } |
1da177e4 | 147 | |
37868fe1 | 148 | /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ |
bbf79d21 | 149 | static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries) |
1da177e4 | 150 | { |
37868fe1 | 151 | struct ldt_struct *new_ldt; |
990e9dc3 | 152 | unsigned int alloc_size; |
37868fe1 | 153 | |
bbf79d21 | 154 | if (num_entries > LDT_ENTRIES) |
37868fe1 AL |
155 | return NULL; |
156 | ||
ec403e2a | 157 | new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL_ACCOUNT); |
37868fe1 AL |
158 | if (!new_ldt) |
159 | return NULL; | |
160 | ||
161 | BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); | |
bbf79d21 | 162 | alloc_size = num_entries * LDT_ENTRY_SIZE; |
37868fe1 AL |
163 | |
164 | /* | |
165 | * Xen is very picky: it requires a page-aligned LDT that has no | |
166 | * trailing nonzero bytes in any page that contains LDT descriptors. | |
167 | * Keep it simple: zero the whole allocation and never allocate less | |
168 | * than PAGE_SIZE. | |
169 | */ | |
170 | if (alloc_size > PAGE_SIZE) | |
ec403e2a | 171 | new_ldt->entries = __vmalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
1da177e4 | 172 | else |
ec403e2a | 173 | new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); |
1da177e4 | 174 | |
37868fe1 AL |
175 | if (!new_ldt->entries) { |
176 | kfree(new_ldt); | |
177 | return NULL; | |
178 | } | |
77e463d1 | 179 | |
f55f0501 AL |
180 | /* The new LDT isn't aliased for PTI yet. */ |
181 | new_ldt->slot = -1; | |
182 | ||
bbf79d21 | 183 | new_ldt->nr_entries = num_entries; |
37868fe1 AL |
184 | return new_ldt; |
185 | } | |
38ffbe66 | 186 | |
9bae3197 JR |
187 | #ifdef CONFIG_PAGE_TABLE_ISOLATION |
188 | ||
189 | static void do_sanity_check(struct mm_struct *mm, | |
190 | bool had_kernel_mapping, | |
191 | bool had_user_mapping) | |
192 | { | |
193 | if (mm->context.ldt) { | |
194 | /* | |
195 | * We already had an LDT. The top-level entry should already | |
196 | * have been allocated and synchronized with the usermode | |
197 | * tables. | |
198 | */ | |
199 | WARN_ON(!had_kernel_mapping); | |
67e87d43 | 200 | if (boot_cpu_has(X86_FEATURE_PTI)) |
9bae3197 JR |
201 | WARN_ON(!had_user_mapping); |
202 | } else { | |
203 | /* | |
204 | * This is the first time we're mapping an LDT for this process. | |
205 | * Sync the pgd to the usermode tables. | |
206 | */ | |
207 | WARN_ON(had_kernel_mapping); | |
67e87d43 | 208 | if (boot_cpu_has(X86_FEATURE_PTI)) |
9bae3197 JR |
209 | WARN_ON(had_user_mapping); |
210 | } | |
211 | } | |
212 | ||
6df934b9 JR |
213 | #ifdef CONFIG_X86_PAE |
214 | ||
215 | static pmd_t *pgd_to_pmd_walk(pgd_t *pgd, unsigned long va) | |
216 | { | |
217 | p4d_t *p4d; | |
218 | pud_t *pud; | |
219 | ||
220 | if (pgd->pgd == 0) | |
221 | return NULL; | |
222 | ||
223 | p4d = p4d_offset(pgd, va); | |
224 | if (p4d_none(*p4d)) | |
225 | return NULL; | |
226 | ||
227 | pud = pud_offset(p4d, va); | |
228 | if (pud_none(*pud)) | |
229 | return NULL; | |
230 | ||
231 | return pmd_offset(pud, va); | |
232 | } | |
233 | ||
234 | static void map_ldt_struct_to_user(struct mm_struct *mm) | |
235 | { | |
236 | pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR); | |
237 | pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); | |
238 | pmd_t *k_pmd, *u_pmd; | |
239 | ||
240 | k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR); | |
241 | u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR); | |
242 | ||
67e87d43 | 243 | if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) |
6df934b9 JR |
244 | set_pmd(u_pmd, *k_pmd); |
245 | } | |
246 | ||
247 | static void sanity_check_ldt_mapping(struct mm_struct *mm) | |
248 | { | |
249 | pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR); | |
250 | pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd); | |
251 | bool had_kernel, had_user; | |
252 | pmd_t *k_pmd, *u_pmd; | |
253 | ||
254 | k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR); | |
255 | u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR); | |
256 | had_kernel = (k_pmd->pmd != 0); | |
257 | had_user = (u_pmd->pmd != 0); | |
258 | ||
259 | do_sanity_check(mm, had_kernel, had_user); | |
260 | } | |
261 | ||
262 | #else /* !CONFIG_X86_PAE */ | |
263 | ||
9bae3197 JR |
264 | static void map_ldt_struct_to_user(struct mm_struct *mm) |
265 | { | |
266 | pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR); | |
267 | ||
67e87d43 | 268 | if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt) |
9bae3197 JR |
269 | set_pgd(kernel_to_user_pgdp(pgd), *pgd); |
270 | } | |
271 | ||
272 | static void sanity_check_ldt_mapping(struct mm_struct *mm) | |
273 | { | |
274 | pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR); | |
275 | bool had_kernel = (pgd->pgd != 0); | |
276 | bool had_user = (kernel_to_user_pgdp(pgd)->pgd != 0); | |
277 | ||
278 | do_sanity_check(mm, had_kernel, had_user); | |
279 | } | |
280 | ||
6df934b9 JR |
281 | #endif /* CONFIG_X86_PAE */ |
282 | ||
f55f0501 AL |
283 | /* |
284 | * If PTI is enabled, this maps the LDT into the kernelmode and | |
285 | * usermode tables for the given mm. | |
f55f0501 AL |
286 | */ |
287 | static int | |
288 | map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) | |
289 | { | |
f55f0501 | 290 | unsigned long va; |
9bae3197 | 291 | bool is_vmalloc; |
f55f0501 | 292 | spinlock_t *ptl; |
a0e6e083 | 293 | int i, nr_pages; |
f55f0501 | 294 | |
67e87d43 | 295 | if (!boot_cpu_has(X86_FEATURE_PTI)) |
f55f0501 AL |
296 | return 0; |
297 | ||
298 | /* | |
299 | * Any given ldt_struct should have map_ldt_struct() called at most | |
300 | * once. | |
301 | */ | |
302 | WARN_ON(ldt->slot != -1); | |
303 | ||
9bae3197 JR |
304 | /* Check if the current mappings are sane */ |
305 | sanity_check_ldt_mapping(mm); | |
306 | ||
f55f0501 AL |
307 | is_vmalloc = is_vmalloc_addr(ldt->entries); |
308 | ||
a0e6e083 KS |
309 | nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); |
310 | ||
311 | for (i = 0; i < nr_pages; i++) { | |
f55f0501 AL |
312 | unsigned long offset = i << PAGE_SHIFT; |
313 | const void *src = (char *)ldt->entries + offset; | |
314 | unsigned long pfn; | |
fb43d6cb | 315 | pgprot_t pte_prot; |
f55f0501 AL |
316 | pte_t pte, *ptep; |
317 | ||
318 | va = (unsigned long)ldt_slot_va(slot) + offset; | |
319 | pfn = is_vmalloc ? vmalloc_to_pfn(src) : | |
320 | page_to_pfn(virt_to_page(src)); | |
321 | /* | |
322 | * Treat the PTI LDT range as a *userspace* range. | |
323 | * get_locked_pte() will allocate all needed pagetables | |
324 | * and account for them in this mm. | |
325 | */ | |
326 | ptep = get_locked_pte(mm, va, &ptl); | |
327 | if (!ptep) | |
328 | return -ENOMEM; | |
9f5cb6b3 TG |
329 | /* |
330 | * Map it RO so the easy to find address is not a primary | |
331 | * target via some kernel interface which misses a | |
332 | * permission check. | |
333 | */ | |
fb43d6cb DH |
334 | pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL); |
335 | /* Filter out unsuppored __PAGE_KERNEL* bits: */ | |
e6f39e87 | 336 | pgprot_val(pte_prot) &= __supported_pte_mask; |
fb43d6cb | 337 | pte = pfn_pte(pfn, pte_prot); |
f55f0501 AL |
338 | set_pte_at(mm, va, ptep, pte); |
339 | pte_unmap_unlock(ptep, ptl); | |
340 | } | |
341 | ||
9bae3197 JR |
342 | /* Propagate LDT mapping to the user page-table */ |
343 | map_ldt_struct_to_user(mm); | |
f55f0501 | 344 | |
f55f0501 | 345 | ldt->slot = slot; |
f55f0501 AL |
346 | return 0; |
347 | } | |
348 | ||
a0e6e083 KS |
349 | static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) |
350 | { | |
351 | unsigned long va; | |
352 | int i, nr_pages; | |
353 | ||
354 | if (!ldt) | |
355 | return; | |
356 | ||
357 | /* LDT map/unmap is only required for PTI */ | |
67e87d43 | 358 | if (!boot_cpu_has(X86_FEATURE_PTI)) |
a0e6e083 KS |
359 | return; |
360 | ||
361 | nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE); | |
362 | ||
363 | for (i = 0; i < nr_pages; i++) { | |
364 | unsigned long offset = i << PAGE_SHIFT; | |
365 | spinlock_t *ptl; | |
366 | pte_t *ptep; | |
367 | ||
368 | va = (unsigned long)ldt_slot_va(ldt->slot) + offset; | |
369 | ptep = get_locked_pte(mm, va, &ptl); | |
370 | pte_clear(mm, va, ptep); | |
371 | pte_unmap_unlock(ptep, ptl); | |
372 | } | |
373 | ||
374 | va = (unsigned long)ldt_slot_va(ldt->slot); | |
375 | flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false); | |
376 | } | |
377 | ||
9bae3197 JR |
378 | #else /* !CONFIG_PAGE_TABLE_ISOLATION */ |
379 | ||
380 | static int | |
381 | map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot) | |
382 | { | |
383 | return 0; | |
384 | } | |
a0e6e083 KS |
385 | |
386 | static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt) | |
387 | { | |
388 | } | |
9bae3197 JR |
389 | #endif /* CONFIG_PAGE_TABLE_ISOLATION */ |
390 | ||
f55f0501 AL |
391 | static void free_ldt_pgtables(struct mm_struct *mm) |
392 | { | |
393 | #ifdef CONFIG_PAGE_TABLE_ISOLATION | |
394 | struct mmu_gather tlb; | |
395 | unsigned long start = LDT_BASE_ADDR; | |
8195d869 | 396 | unsigned long end = LDT_END_ADDR; |
f55f0501 | 397 | |
67e87d43 | 398 | if (!boot_cpu_has(X86_FEATURE_PTI)) |
f55f0501 AL |
399 | return; |
400 | ||
8cf55f24 WD |
401 | /* |
402 | * Although free_pgd_range() is intended for freeing user | |
403 | * page-tables, it also works out for kernel mappings on x86. | |
404 | * We use tlb_gather_mmu_fullmm() to avoid confusing the | |
405 | * range-tracking logic in __tlb_adjust_range(). | |
406 | */ | |
407 | tlb_gather_mmu_fullmm(&tlb, mm); | |
f55f0501 | 408 | free_pgd_range(&tlb, start, end, start, end); |
ae8eba8b | 409 | tlb_finish_mmu(&tlb); |
f55f0501 AL |
410 | #endif |
411 | } | |
412 | ||
37868fe1 AL |
413 | /* After calling this, the LDT is immutable. */ |
414 | static void finalize_ldt_struct(struct ldt_struct *ldt) | |
415 | { | |
bbf79d21 | 416 | paravirt_alloc_ldt(ldt->entries, ldt->nr_entries); |
1da177e4 LT |
417 | } |
418 | ||
c2b3496b | 419 | static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt) |
1da177e4 | 420 | { |
c2b3496b PZ |
421 | mutex_lock(&mm->context.lock); |
422 | ||
3382290e | 423 | /* Synchronizes with READ_ONCE in load_mm_ldt. */ |
c2b3496b | 424 | smp_store_release(&mm->context.ldt, ldt); |
37868fe1 | 425 | |
c2b3496b PZ |
426 | /* Activate the LDT for all CPUs using currents mm. */ |
427 | on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true); | |
428 | ||
429 | mutex_unlock(&mm->context.lock); | |
37868fe1 | 430 | } |
78aa1f66 | 431 | |
37868fe1 AL |
432 | static void free_ldt_struct(struct ldt_struct *ldt) |
433 | { | |
434 | if (likely(!ldt)) | |
435 | return; | |
38ffbe66 | 436 | |
bbf79d21 BP |
437 | paravirt_free_ldt(ldt->entries, ldt->nr_entries); |
438 | if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE) | |
8d5341a6 | 439 | vfree_atomic(ldt->entries); |
37868fe1 | 440 | else |
f454b478 | 441 | free_page((unsigned long)ldt->entries); |
37868fe1 | 442 | kfree(ldt); |
1da177e4 LT |
443 | } |
444 | ||
445 | /* | |
a4828f81 TG |
446 | * Called on fork from arch_dup_mmap(). Just copy the current LDT state, |
447 | * the new task is not running, so nothing can be installed. | |
1da177e4 | 448 | */ |
a4828f81 | 449 | int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm) |
1da177e4 | 450 | { |
37868fe1 | 451 | struct ldt_struct *new_ldt; |
1da177e4 LT |
452 | int retval = 0; |
453 | ||
a4828f81 | 454 | if (!old_mm) |
37868fe1 | 455 | return 0; |
37868fe1 AL |
456 | |
457 | mutex_lock(&old_mm->context.lock); | |
a4828f81 | 458 | if (!old_mm->context.ldt) |
37868fe1 | 459 | goto out_unlock; |
37868fe1 | 460 | |
bbf79d21 | 461 | new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries); |
37868fe1 AL |
462 | if (!new_ldt) { |
463 | retval = -ENOMEM; | |
464 | goto out_unlock; | |
465 | } | |
466 | ||
467 | memcpy(new_ldt->entries, old_mm->context.ldt->entries, | |
bbf79d21 | 468 | new_ldt->nr_entries * LDT_ENTRY_SIZE); |
37868fe1 AL |
469 | finalize_ldt_struct(new_ldt); |
470 | ||
f55f0501 AL |
471 | retval = map_ldt_struct(mm, new_ldt, 0); |
472 | if (retval) { | |
473 | free_ldt_pgtables(mm); | |
474 | free_ldt_struct(new_ldt); | |
475 | goto out_unlock; | |
476 | } | |
37868fe1 AL |
477 | mm->context.ldt = new_ldt; |
478 | ||
479 | out_unlock: | |
480 | mutex_unlock(&old_mm->context.lock); | |
1da177e4 LT |
481 | return retval; |
482 | } | |
483 | ||
484 | /* | |
77e463d1 TG |
485 | * No need to lock the MM as we are the last user |
486 | * | |
487 | * 64bit: Don't touch the LDT register - we're already in the next thread. | |
1da177e4 | 488 | */ |
39a0526f | 489 | void destroy_context_ldt(struct mm_struct *mm) |
1da177e4 | 490 | { |
37868fe1 AL |
491 | free_ldt_struct(mm->context.ldt); |
492 | mm->context.ldt = NULL; | |
1da177e4 LT |
493 | } |
494 | ||
f55f0501 AL |
495 | void ldt_arch_exit_mmap(struct mm_struct *mm) |
496 | { | |
497 | free_ldt_pgtables(mm); | |
498 | } | |
499 | ||
78aa1f66 | 500 | static int read_ldt(void __user *ptr, unsigned long bytecount) |
1da177e4 | 501 | { |
78aa1f66 | 502 | struct mm_struct *mm = current->mm; |
bbf79d21 BP |
503 | unsigned long entries_size; |
504 | int retval; | |
1da177e4 | 505 | |
c2b3496b | 506 | down_read(&mm->context.ldt_usr_sem); |
37868fe1 AL |
507 | |
508 | if (!mm->context.ldt) { | |
509 | retval = 0; | |
510 | goto out_unlock; | |
511 | } | |
512 | ||
78aa1f66 TG |
513 | if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) |
514 | bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; | |
1da177e4 | 515 | |
bbf79d21 BP |
516 | entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE; |
517 | if (entries_size > bytecount) | |
518 | entries_size = bytecount; | |
1da177e4 | 519 | |
bbf79d21 | 520 | if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) { |
37868fe1 AL |
521 | retval = -EFAULT; |
522 | goto out_unlock; | |
523 | } | |
524 | ||
bbf79d21 | 525 | if (entries_size != bytecount) { |
37868fe1 | 526 | /* Zero-fill the rest and pretend we read bytecount bytes. */ |
bbf79d21 | 527 | if (clear_user(ptr + entries_size, bytecount - entries_size)) { |
37868fe1 AL |
528 | retval = -EFAULT; |
529 | goto out_unlock; | |
1da177e4 LT |
530 | } |
531 | } | |
37868fe1 AL |
532 | retval = bytecount; |
533 | ||
534 | out_unlock: | |
c2b3496b | 535 | up_read(&mm->context.ldt_usr_sem); |
37868fe1 | 536 | return retval; |
1da177e4 LT |
537 | } |
538 | ||
78aa1f66 | 539 | static int read_default_ldt(void __user *ptr, unsigned long bytecount) |
1da177e4 | 540 | { |
77e463d1 TG |
541 | /* CHECKME: Can we use _one_ random number ? */ |
542 | #ifdef CONFIG_X86_32 | |
543 | unsigned long size = 5 * sizeof(struct desc_struct); | |
544 | #else | |
545 | unsigned long size = 128; | |
546 | #endif | |
547 | if (bytecount > size) | |
548 | bytecount = size; | |
1da177e4 LT |
549 | if (clear_user(ptr, bytecount)) |
550 | return -EFAULT; | |
78aa1f66 | 551 | return bytecount; |
1da177e4 LT |
552 | } |
553 | ||
cc801833 AL |
554 | static bool allow_16bit_segments(void) |
555 | { | |
556 | if (!IS_ENABLED(CONFIG_X86_16BIT)) | |
557 | return false; | |
558 | ||
559 | #ifdef CONFIG_XEN_PV | |
560 | /* | |
561 | * Xen PV does not implement ESPFIX64, which means that 16-bit | |
562 | * segments will not work correctly. Until either Xen PV implements | |
563 | * ESPFIX64 and can signal this fact to the guest or unless someone | |
564 | * provides compelling evidence that allowing broken 16-bit segments | |
565 | * is worthwhile, disallow 16-bit segments under Xen PV. | |
566 | */ | |
567 | if (xen_pv_domain()) { | |
bb5a93aa | 568 | pr_info_once("Warning: 16-bit segments do not work correctly in a Xen PV guest\n"); |
cc801833 AL |
569 | return false; |
570 | } | |
571 | #endif | |
572 | ||
573 | return true; | |
574 | } | |
575 | ||
78aa1f66 | 576 | static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) |
1da177e4 | 577 | { |
70f5088d | 578 | struct mm_struct *mm = current->mm; |
990e9dc3 | 579 | struct ldt_struct *new_ldt, *old_ldt; |
bbf79d21 | 580 | unsigned int old_nr_entries, new_nr_entries; |
990e9dc3 | 581 | struct user_desc ldt_info; |
5af72502 | 582 | struct desc_struct ldt; |
1da177e4 | 583 | int error; |
1da177e4 LT |
584 | |
585 | error = -EINVAL; | |
1da177e4 LT |
586 | if (bytecount != sizeof(ldt_info)) |
587 | goto out; | |
78aa1f66 | 588 | error = -EFAULT; |
70f5088d | 589 | if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info))) |
1da177e4 LT |
590 | goto out; |
591 | ||
592 | error = -EINVAL; | |
593 | if (ldt_info.entry_number >= LDT_ENTRIES) | |
594 | goto out; | |
595 | if (ldt_info.contents == 3) { | |
596 | if (oldmode) | |
597 | goto out; | |
598 | if (ldt_info.seg_not_present == 0) | |
599 | goto out; | |
600 | } | |
601 | ||
37868fe1 AL |
602 | if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) || |
603 | LDT_empty(&ldt_info)) { | |
604 | /* The user wants to clear the entry. */ | |
605 | memset(&ldt, 0, sizeof(ldt)); | |
606 | } else { | |
cc801833 | 607 | if (!ldt_info.seg_32bit && !allow_16bit_segments()) { |
37868fe1 AL |
608 | error = -EINVAL; |
609 | goto out; | |
1da177e4 | 610 | } |
37868fe1 AL |
611 | |
612 | fill_ldt(&ldt, &ldt_info); | |
613 | if (oldmode) | |
614 | ldt.avl = 0; | |
1da177e4 LT |
615 | } |
616 | ||
c2b3496b PZ |
617 | if (down_write_killable(&mm->context.ldt_usr_sem)) |
618 | return -EINTR; | |
37868fe1 | 619 | |
bbf79d21 BP |
620 | old_ldt = mm->context.ldt; |
621 | old_nr_entries = old_ldt ? old_ldt->nr_entries : 0; | |
622 | new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries); | |
37868fe1 AL |
623 | |
624 | error = -ENOMEM; | |
bbf79d21 | 625 | new_ldt = alloc_ldt_struct(new_nr_entries); |
37868fe1 | 626 | if (!new_ldt) |
34273f41 | 627 | goto out_unlock; |
34273f41 | 628 | |
37868fe1 | 629 | if (old_ldt) |
bbf79d21 BP |
630 | memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE); |
631 | ||
37868fe1 AL |
632 | new_ldt->entries[ldt_info.entry_number] = ldt; |
633 | finalize_ldt_struct(new_ldt); | |
1da177e4 | 634 | |
f55f0501 AL |
635 | /* |
636 | * If we are using PTI, map the new LDT into the userspace pagetables. | |
637 | * If there is already an LDT, use the other slot so that other CPUs | |
638 | * will continue to use the old LDT until install_ldt() switches | |
639 | * them over to the new LDT. | |
640 | */ | |
641 | error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0); | |
642 | if (error) { | |
a62d6985 TG |
643 | /* |
644 | * This only can fail for the first LDT setup. If an LDT is | |
645 | * already installed then the PTE page is already | |
646 | * populated. Mop up a half populated page table. | |
647 | */ | |
7f414195 TG |
648 | if (!WARN_ON_ONCE(old_ldt)) |
649 | free_ldt_pgtables(mm); | |
a62d6985 | 650 | free_ldt_struct(new_ldt); |
f55f0501 AL |
651 | goto out_unlock; |
652 | } | |
653 | ||
37868fe1 | 654 | install_ldt(mm, new_ldt); |
a0e6e083 | 655 | unmap_ldt_struct(mm, old_ldt); |
37868fe1 | 656 | free_ldt_struct(old_ldt); |
1da177e4 LT |
657 | error = 0; |
658 | ||
659 | out_unlock: | |
c2b3496b | 660 | up_write(&mm->context.ldt_usr_sem); |
1da177e4 LT |
661 | out: |
662 | return error; | |
663 | } | |
664 | ||
da20ab35 DH |
665 | SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , |
666 | unsigned long , bytecount) | |
1da177e4 LT |
667 | { |
668 | int ret = -ENOSYS; | |
669 | ||
670 | switch (func) { | |
671 | case 0: | |
672 | ret = read_ldt(ptr, bytecount); | |
673 | break; | |
674 | case 1: | |
675 | ret = write_ldt(ptr, bytecount, 1); | |
676 | break; | |
677 | case 2: | |
678 | ret = read_default_ldt(ptr, bytecount); | |
679 | break; | |
680 | case 0x11: | |
681 | ret = write_ldt(ptr, bytecount, 0); | |
682 | break; | |
683 | } | |
da20ab35 DH |
684 | /* |
685 | * The SYSCALL_DEFINE() macros give us an 'unsigned long' | |
686 | * return type, but tht ABI for sys_modify_ldt() expects | |
687 | * 'int'. This cast gives us an int-sized value in %rax | |
688 | * for the return code. The 'unsigned' is necessary so | |
689 | * the compiler does not try to sign-extend the negative | |
690 | * return codes into the high half of the register when | |
691 | * taking the value from int->long. | |
692 | */ | |
693 | return (unsigned int)ret; | |
1da177e4 | 694 | } |