Commit | Line | Data |
---|---|---|
3f705dfd AL |
1 | /* |
2 | * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace | |
3 | * Copyright (c) 2014-2015 Andrew Lutomirski | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms and conditions of the GNU General Public License, | |
7 | * version 2, as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope it will be useful, but | |
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * General Public License for more details. | |
13 | * | |
14 | * This is a series of tests that exercises the sigreturn(2) syscall and | |
15 | * the IRET / SYSRET paths in the kernel. | |
16 | * | |
17 | * For now, this focuses on the effects of unusual CS and SS values, | |
18 | * and it has a bunch of tests to make sure that ESP/RSP is restored | |
19 | * properly. | |
20 | * | |
21 | * The basic idea behind these tests is to raise(SIGUSR1) to create a | |
22 | * sigcontext frame, plug in the values to be tested, and then return, | |
23 | * which implicitly invokes sigreturn(2) and programs the user context | |
24 | * as desired. | |
25 | * | |
26 | * For tests for which we expect sigreturn and the subsequent return to | |
27 | * user mode to succeed, we return to a short trampoline that generates | |
28 | * SIGTRAP so that the meat of the tests can be ordinary C code in a | |
29 | * SIGTRAP handler. | |
30 | * | |
31 | * The inner workings of each test is documented below. | |
32 | * | |
33 | * Do not run on outdated, unpatched kernels at risk of nasty crashes. | |
34 | */ | |
35 | ||
36 | #define _GNU_SOURCE | |
37 | ||
38 | #include <sys/time.h> | |
39 | #include <time.h> | |
40 | #include <stdlib.h> | |
41 | #include <sys/syscall.h> | |
42 | #include <unistd.h> | |
43 | #include <stdio.h> | |
44 | #include <string.h> | |
45 | #include <inttypes.h> | |
46 | #include <sys/mman.h> | |
47 | #include <sys/signal.h> | |
48 | #include <sys/ucontext.h> | |
49 | #include <asm/ldt.h> | |
50 | #include <err.h> | |
51 | #include <setjmp.h> | |
52 | #include <stddef.h> | |
53 | #include <stdbool.h> | |
54 | #include <sys/ptrace.h> | |
55 | #include <sys/user.h> | |
56 | ||
4f6c8938 AL |
57 | /* Pull in AR_xyz defines. */ |
58 | typedef unsigned int u32; | |
59 | typedef unsigned short u16; | |
60 | #include "../../../../arch/x86/include/asm/desc_defs.h" | |
61 | ||
62 | /* | |
63 | * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc | |
64 | * headers. | |
65 | */ | |
66 | #ifdef __x86_64__ | |
67 | /* | |
68 | * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on | |
69 | * kernels that save SS in the sigcontext. All kernels that set | |
70 | * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp | |
71 | * regardless of SS (i.e. they implement espfix). | |
72 | * | |
73 | * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS | |
74 | * when delivering a signal that came from 64-bit code. | |
75 | * | |
76 | * Sigreturn restores SS as follows: | |
77 | * | |
78 | * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || | |
79 | * saved CS is not 64-bit) | |
80 | * new SS = saved SS (will fail IRET and signal if invalid) | |
81 | * else | |
82 | * new SS = a flat 32-bit data segment | |
83 | */ | |
84 | #define UC_SIGCONTEXT_SS 0x2 | |
85 | #define UC_STRICT_RESTORE_SS 0x4 | |
86 | #endif | |
87 | ||
3f705dfd AL |
88 | /* |
89 | * In principle, this test can run on Linux emulation layers (e.g. | |
90 | * Illumos "LX branded zones"). Solaris-based kernels reserve LDT | |
91 | * entries 0-5 for their own internal purposes, so start our LDT | |
92 | * allocations above that reservation. (The tests don't pass on LX | |
93 | * branded zones, but at least this lets them run.) | |
94 | */ | |
95 | #define LDT_OFFSET 6 | |
96 | ||
97 | /* An aligned stack accessible through some of our segments. */ | |
98 | static unsigned char stack16[65536] __attribute__((aligned(4096))); | |
99 | ||
100 | /* | |
101 | * An aligned int3 instruction used as a trampoline. Some of the tests | |
102 | * want to fish out their ss values, so this trampoline copies ss to eax | |
103 | * before the int3. | |
104 | */ | |
105 | asm (".pushsection .text\n\t" | |
106 | ".type int3, @function\n\t" | |
107 | ".align 4096\n\t" | |
108 | "int3:\n\t" | |
1ef0199a | 109 | "mov %ss,%ecx\n\t" |
3f705dfd AL |
110 | "int3\n\t" |
111 | ".size int3, . - int3\n\t" | |
112 | ".align 4096, 0xcc\n\t" | |
113 | ".popsection"); | |
114 | extern char int3[4096]; | |
115 | ||
116 | /* | |
117 | * At startup, we prepapre: | |
118 | * | |
119 | * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero | |
120 | * descriptor or out of bounds). | |
121 | * - code16_sel: A 16-bit LDT code segment pointing to int3. | |
122 | * - data16_sel: A 16-bit LDT data segment pointing to stack16. | |
123 | * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. | |
124 | * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. | |
125 | * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. | |
126 | * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to | |
127 | * stack16. | |
128 | * | |
129 | * For no particularly good reason, xyz_sel is a selector value with the | |
130 | * RPL and LDT bits filled in, whereas xyz_idx is just an index into the | |
131 | * descriptor table. These variables will be zero if their respective | |
132 | * segments could not be allocated. | |
133 | */ | |
134 | static unsigned short ldt_nonexistent_sel; | |
135 | static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel; | |
136 | ||
137 | static unsigned short gdt_data16_idx, gdt_npdata32_idx; | |
138 | ||
139 | static unsigned short GDT3(int idx) | |
140 | { | |
141 | return (idx << 3) | 3; | |
142 | } | |
143 | ||
144 | static unsigned short LDT3(int idx) | |
145 | { | |
146 | return (idx << 3) | 7; | |
147 | } | |
148 | ||
149 | /* Our sigaltstack scratch space. */ | |
150 | static char altstack_data[SIGSTKSZ]; | |
151 | ||
152 | static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), | |
153 | int flags) | |
154 | { | |
155 | struct sigaction sa; | |
156 | memset(&sa, 0, sizeof(sa)); | |
157 | sa.sa_sigaction = handler; | |
158 | sa.sa_flags = SA_SIGINFO | flags; | |
159 | sigemptyset(&sa.sa_mask); | |
160 | if (sigaction(sig, &sa, 0)) | |
161 | err(1, "sigaction"); | |
162 | } | |
163 | ||
164 | static void clearhandler(int sig) | |
165 | { | |
166 | struct sigaction sa; | |
167 | memset(&sa, 0, sizeof(sa)); | |
168 | sa.sa_handler = SIG_DFL; | |
169 | sigemptyset(&sa.sa_mask); | |
170 | if (sigaction(sig, &sa, 0)) | |
171 | err(1, "sigaction"); | |
172 | } | |
173 | ||
174 | static void add_ldt(const struct user_desc *desc, unsigned short *var, | |
175 | const char *name) | |
176 | { | |
177 | if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) { | |
178 | *var = LDT3(desc->entry_number); | |
179 | } else { | |
180 | printf("[NOTE]\tFailed to create %s segment\n", name); | |
181 | *var = 0; | |
182 | } | |
183 | } | |
184 | ||
185 | static void setup_ldt(void) | |
186 | { | |
187 | if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16)) | |
188 | errx(1, "stack16 is too high\n"); | |
189 | if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3)) | |
190 | errx(1, "int3 is too high\n"); | |
191 | ||
192 | ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2); | |
193 | ||
194 | const struct user_desc code16_desc = { | |
195 | .entry_number = LDT_OFFSET + 0, | |
196 | .base_addr = (unsigned long)int3, | |
197 | .limit = 4095, | |
198 | .seg_32bit = 0, | |
199 | .contents = 2, /* Code, not conforming */ | |
200 | .read_exec_only = 0, | |
201 | .limit_in_pages = 0, | |
202 | .seg_not_present = 0, | |
203 | .useable = 0 | |
204 | }; | |
205 | add_ldt(&code16_desc, &code16_sel, "code16"); | |
206 | ||
207 | const struct user_desc data16_desc = { | |
208 | .entry_number = LDT_OFFSET + 1, | |
209 | .base_addr = (unsigned long)stack16, | |
210 | .limit = 0xffff, | |
211 | .seg_32bit = 0, | |
212 | .contents = 0, /* Data, grow-up */ | |
213 | .read_exec_only = 0, | |
214 | .limit_in_pages = 0, | |
215 | .seg_not_present = 0, | |
216 | .useable = 0 | |
217 | }; | |
218 | add_ldt(&data16_desc, &data16_sel, "data16"); | |
219 | ||
220 | const struct user_desc npcode32_desc = { | |
221 | .entry_number = LDT_OFFSET + 3, | |
222 | .base_addr = (unsigned long)int3, | |
223 | .limit = 4095, | |
224 | .seg_32bit = 1, | |
225 | .contents = 2, /* Code, not conforming */ | |
226 | .read_exec_only = 0, | |
227 | .limit_in_pages = 0, | |
228 | .seg_not_present = 1, | |
229 | .useable = 0 | |
230 | }; | |
231 | add_ldt(&npcode32_desc, &npcode32_sel, "npcode32"); | |
232 | ||
233 | const struct user_desc npdata32_desc = { | |
234 | .entry_number = LDT_OFFSET + 4, | |
235 | .base_addr = (unsigned long)stack16, | |
236 | .limit = 0xffff, | |
237 | .seg_32bit = 1, | |
238 | .contents = 0, /* Data, grow-up */ | |
239 | .read_exec_only = 0, | |
240 | .limit_in_pages = 0, | |
241 | .seg_not_present = 1, | |
242 | .useable = 0 | |
243 | }; | |
244 | add_ldt(&npdata32_desc, &npdata32_sel, "npdata32"); | |
245 | ||
246 | struct user_desc gdt_data16_desc = { | |
247 | .entry_number = -1, | |
248 | .base_addr = (unsigned long)stack16, | |
249 | .limit = 0xffff, | |
250 | .seg_32bit = 0, | |
251 | .contents = 0, /* Data, grow-up */ | |
252 | .read_exec_only = 0, | |
253 | .limit_in_pages = 0, | |
254 | .seg_not_present = 0, | |
255 | .useable = 0 | |
256 | }; | |
257 | ||
258 | if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { | |
259 | /* | |
260 | * This probably indicates vulnerability to CVE-2014-8133. | |
261 | * Merely getting here isn't definitive, though, and we'll | |
262 | * diagnose the problem for real later on. | |
263 | */ | |
264 | printf("[WARN]\tset_thread_area allocated data16 at index %d\n", | |
265 | gdt_data16_desc.entry_number); | |
266 | gdt_data16_idx = gdt_data16_desc.entry_number; | |
267 | } else { | |
268 | printf("[OK]\tset_thread_area refused 16-bit data\n"); | |
269 | } | |
270 | ||
271 | struct user_desc gdt_npdata32_desc = { | |
272 | .entry_number = -1, | |
273 | .base_addr = (unsigned long)stack16, | |
274 | .limit = 0xffff, | |
275 | .seg_32bit = 1, | |
276 | .contents = 0, /* Data, grow-up */ | |
277 | .read_exec_only = 0, | |
278 | .limit_in_pages = 0, | |
279 | .seg_not_present = 1, | |
280 | .useable = 0 | |
281 | }; | |
282 | ||
283 | if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) { | |
284 | /* | |
285 | * As a hardening measure, newer kernels don't allow this. | |
286 | */ | |
287 | printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n", | |
288 | gdt_npdata32_desc.entry_number); | |
289 | gdt_npdata32_idx = gdt_npdata32_desc.entry_number; | |
290 | } else { | |
291 | printf("[OK]\tset_thread_area refused 16-bit data\n"); | |
292 | } | |
293 | } | |
294 | ||
295 | /* State used by our signal handlers. */ | |
296 | static gregset_t initial_regs, requested_regs, resulting_regs; | |
297 | ||
298 | /* Instructions for the SIGUSR1 handler. */ | |
299 | static volatile unsigned short sig_cs, sig_ss; | |
300 | static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; | |
4f6c8938 AL |
301 | #ifdef __x86_64__ |
302 | static volatile sig_atomic_t sig_corrupt_final_ss; | |
303 | #endif | |
3f705dfd AL |
304 | |
305 | /* Abstractions for some 32-bit vs 64-bit differences. */ | |
306 | #ifdef __x86_64__ | |
307 | # define REG_IP REG_RIP | |
308 | # define REG_SP REG_RSP | |
1ef0199a | 309 | # define REG_CX REG_RCX |
3f705dfd AL |
310 | |
311 | struct selectors { | |
312 | unsigned short cs, gs, fs, ss; | |
313 | }; | |
314 | ||
315 | static unsigned short *ssptr(ucontext_t *ctx) | |
316 | { | |
317 | struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; | |
318 | return &sels->ss; | |
319 | } | |
320 | ||
321 | static unsigned short *csptr(ucontext_t *ctx) | |
322 | { | |
323 | struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; | |
324 | return &sels->cs; | |
325 | } | |
326 | #else | |
327 | # define REG_IP REG_EIP | |
328 | # define REG_SP REG_ESP | |
1ef0199a | 329 | # define REG_CX REG_ECX |
3f705dfd AL |
330 | |
331 | static greg_t *ssptr(ucontext_t *ctx) | |
332 | { | |
333 | return &ctx->uc_mcontext.gregs[REG_SS]; | |
334 | } | |
335 | ||
336 | static greg_t *csptr(ucontext_t *ctx) | |
337 | { | |
338 | return &ctx->uc_mcontext.gregs[REG_CS]; | |
339 | } | |
340 | #endif | |
341 | ||
4f6c8938 AL |
342 | /* |
343 | * Checks a given selector for its code bitness or returns -1 if it's not | |
344 | * a usable code segment selector. | |
345 | */ | |
346 | int cs_bitness(unsigned short cs) | |
347 | { | |
348 | uint32_t valid = 0, ar; | |
349 | asm ("lar %[cs], %[ar]\n\t" | |
350 | "jnz 1f\n\t" | |
351 | "mov $1, %[valid]\n\t" | |
352 | "1:" | |
353 | : [ar] "=r" (ar), [valid] "+rm" (valid) | |
354 | : [cs] "r" (cs)); | |
355 | ||
356 | if (!valid) | |
357 | return -1; | |
358 | ||
359 | bool db = (ar & (1 << 22)); | |
360 | bool l = (ar & (1 << 21)); | |
361 | ||
362 | if (!(ar & (1<<11))) | |
363 | return -1; /* Not code. */ | |
364 | ||
365 | if (l && !db) | |
366 | return 64; | |
367 | else if (!l && db) | |
368 | return 32; | |
369 | else if (!l && !db) | |
370 | return 16; | |
371 | else | |
372 | return -1; /* Unknown bitness. */ | |
373 | } | |
374 | ||
375 | /* | |
376 | * Checks a given selector for its code bitness or returns -1 if it's not | |
377 | * a usable code segment selector. | |
378 | */ | |
379 | bool is_valid_ss(unsigned short cs) | |
380 | { | |
381 | uint32_t valid = 0, ar; | |
382 | asm ("lar %[cs], %[ar]\n\t" | |
383 | "jnz 1f\n\t" | |
384 | "mov $1, %[valid]\n\t" | |
385 | "1:" | |
386 | : [ar] "=r" (ar), [valid] "+rm" (valid) | |
387 | : [cs] "r" (cs)); | |
388 | ||
389 | if (!valid) | |
390 | return false; | |
391 | ||
392 | if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA && | |
393 | (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN) | |
394 | return false; | |
395 | ||
396 | return (ar & AR_P); | |
397 | } | |
398 | ||
3f705dfd AL |
399 | /* Number of errors in the current test case. */ |
400 | static volatile sig_atomic_t nerrs; | |
401 | ||
4f6c8938 AL |
402 | static void validate_signal_ss(int sig, ucontext_t *ctx) |
403 | { | |
404 | #ifdef __x86_64__ | |
405 | bool was_64bit = (cs_bitness(*csptr(ctx)) == 64); | |
406 | ||
407 | if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) { | |
408 | printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n"); | |
409 | nerrs++; | |
410 | ||
411 | /* | |
412 | * This happens on Linux 4.1. The rest will fail, too, so | |
413 | * return now to reduce the noise. | |
414 | */ | |
415 | return; | |
416 | } | |
417 | ||
418 | /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */ | |
419 | if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) { | |
420 | printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n", | |
421 | sig); | |
422 | nerrs++; | |
423 | } | |
424 | ||
425 | if (is_valid_ss(*ssptr(ctx))) { | |
426 | /* | |
427 | * DOSEMU was written before 64-bit sigcontext had SS, and | |
428 | * it tries to figure out the signal source SS by looking at | |
429 | * the physical register. Make sure that keeps working. | |
430 | */ | |
431 | unsigned short hw_ss; | |
432 | asm ("mov %%ss, %0" : "=rm" (hw_ss)); | |
433 | if (hw_ss != *ssptr(ctx)) { | |
434 | printf("[FAIL]\tHW SS didn't match saved SS\n"); | |
435 | nerrs++; | |
436 | } | |
437 | } | |
438 | #endif | |
439 | } | |
440 | ||
3f705dfd AL |
441 | /* |
442 | * SIGUSR1 handler. Sets CS and SS as requested and points IP to the | |
443 | * int3 trampoline. Sets SP to a large known value so that we can see | |
444 | * whether the value round-trips back to user mode correctly. | |
445 | */ | |
446 | static void sigusr1(int sig, siginfo_t *info, void *ctx_void) | |
447 | { | |
448 | ucontext_t *ctx = (ucontext_t*)ctx_void; | |
449 | ||
4f6c8938 AL |
450 | validate_signal_ss(sig, ctx); |
451 | ||
3f705dfd AL |
452 | memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); |
453 | ||
454 | *csptr(ctx) = sig_cs; | |
455 | *ssptr(ctx) = sig_ss; | |
456 | ||
457 | ctx->uc_mcontext.gregs[REG_IP] = | |
458 | sig_cs == code16_sel ? 0 : (unsigned long)&int3; | |
459 | ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; | |
1ef0199a | 460 | ctx->uc_mcontext.gregs[REG_CX] = 0; |
3f705dfd AL |
461 | |
462 | memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); | |
1ef0199a | 463 | requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */ |
3f705dfd AL |
464 | |
465 | return; | |
466 | } | |
467 | ||
468 | /* | |
4f6c8938 AL |
469 | * Called after a successful sigreturn (via int3) or from a failed |
470 | * sigreturn (directly by kernel). Restores our state so that the | |
471 | * original raise(SIGUSR1) returns. | |
3f705dfd AL |
472 | */ |
473 | static void sigtrap(int sig, siginfo_t *info, void *ctx_void) | |
474 | { | |
475 | ucontext_t *ctx = (ucontext_t*)ctx_void; | |
476 | ||
4f6c8938 AL |
477 | validate_signal_ss(sig, ctx); |
478 | ||
3f705dfd AL |
479 | sig_err = ctx->uc_mcontext.gregs[REG_ERR]; |
480 | sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; | |
481 | ||
482 | unsigned short ss; | |
483 | asm ("mov %%ss,%0" : "=r" (ss)); | |
484 | ||
1ef0199a | 485 | greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX]; |
3f705dfd AL |
486 | if (asm_ss != sig_ss && sig == SIGTRAP) { |
487 | /* Sanity check failure. */ | |
488 | printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", | |
489 | ss, *ssptr(ctx), (unsigned long long)asm_ss); | |
490 | nerrs++; | |
491 | } | |
492 | ||
493 | memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); | |
494 | memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); | |
495 | ||
4f6c8938 AL |
496 | #ifdef __x86_64__ |
497 | if (sig_corrupt_final_ss) { | |
498 | if (ctx->uc_flags & UC_STRICT_RESTORE_SS) { | |
499 | printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n"); | |
500 | nerrs++; | |
501 | } else { | |
502 | /* | |
503 | * DOSEMU transitions from 32-bit to 64-bit mode by | |
504 | * adjusting sigcontext, and it requires that this work | |
505 | * even if the saved SS is bogus. | |
506 | */ | |
507 | printf("\tCorrupting SS on return to 64-bit mode\n"); | |
508 | *ssptr(ctx) = 0; | |
509 | } | |
510 | } | |
511 | #endif | |
512 | ||
3f705dfd AL |
513 | sig_trapped = sig; |
514 | } | |
515 | ||
4f6c8938 AL |
516 | #ifdef __x86_64__ |
517 | /* Tests recovery if !UC_STRICT_RESTORE_SS */ | |
518 | static void sigusr2(int sig, siginfo_t *info, void *ctx_void) | |
3f705dfd | 519 | { |
4f6c8938 | 520 | ucontext_t *ctx = (ucontext_t*)ctx_void; |
3f705dfd | 521 | |
4f6c8938 AL |
522 | if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) { |
523 | printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n"); | |
524 | nerrs++; | |
525 | return; /* We can't do the rest. */ | |
526 | } | |
3f705dfd | 527 | |
4f6c8938 AL |
528 | ctx->uc_flags &= ~UC_STRICT_RESTORE_SS; |
529 | *ssptr(ctx) = 0; | |
3f705dfd | 530 | |
4f6c8938 AL |
531 | /* Return. The kernel should recover without sending another signal. */ |
532 | } | |
3f705dfd | 533 | |
4f6c8938 AL |
534 | static int test_nonstrict_ss(void) |
535 | { | |
536 | clearhandler(SIGUSR1); | |
537 | clearhandler(SIGTRAP); | |
538 | clearhandler(SIGSEGV); | |
539 | clearhandler(SIGILL); | |
540 | sethandler(SIGUSR2, sigusr2, 0); | |
541 | ||
542 | nerrs = 0; | |
543 | ||
544 | printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n"); | |
545 | raise(SIGUSR2); | |
546 | if (!nerrs) | |
547 | printf("[OK]\tIt worked\n"); | |
548 | ||
549 | return nerrs; | |
3f705dfd | 550 | } |
4f6c8938 | 551 | #endif |
3f705dfd AL |
552 | |
553 | /* Finds a usable code segment of the requested bitness. */ | |
554 | int find_cs(int bitness) | |
555 | { | |
556 | unsigned short my_cs; | |
557 | ||
558 | asm ("mov %%cs,%0" : "=r" (my_cs)); | |
559 | ||
560 | if (cs_bitness(my_cs) == bitness) | |
561 | return my_cs; | |
562 | if (cs_bitness(my_cs + (2 << 3)) == bitness) | |
563 | return my_cs + (2 << 3); | |
564 | if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness) | |
565 | return my_cs - (2 << 3); | |
566 | if (cs_bitness(code16_sel) == bitness) | |
567 | return code16_sel; | |
568 | ||
569 | printf("[WARN]\tCould not find %d-bit CS\n", bitness); | |
570 | return -1; | |
571 | } | |
572 | ||
573 | static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) | |
574 | { | |
575 | int cs = find_cs(cs_bits); | |
576 | if (cs == -1) { | |
577 | printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n", | |
578 | cs_bits, use_16bit_ss ? 16 : 32); | |
579 | return 0; | |
580 | } | |
581 | ||
582 | if (force_ss != -1) { | |
583 | sig_ss = force_ss; | |
584 | } else { | |
585 | if (use_16bit_ss) { | |
586 | if (!data16_sel) { | |
587 | printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n", | |
588 | cs_bits); | |
589 | return 0; | |
590 | } | |
591 | sig_ss = data16_sel; | |
592 | } else { | |
593 | asm volatile ("mov %%ss,%0" : "=r" (sig_ss)); | |
594 | } | |
595 | } | |
596 | ||
597 | sig_cs = cs; | |
598 | ||
599 | printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n", | |
600 | cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss, | |
601 | (sig_ss & 4) ? "" : ", GDT"); | |
602 | ||
603 | raise(SIGUSR1); | |
604 | ||
605 | nerrs = 0; | |
606 | ||
607 | /* | |
608 | * Check that each register had an acceptable value when the | |
609 | * int3 trampoline was invoked. | |
610 | */ | |
611 | for (int i = 0; i < NGREG; i++) { | |
612 | greg_t req = requested_regs[i], res = resulting_regs[i]; | |
613 | if (i == REG_TRAPNO || i == REG_IP) | |
614 | continue; /* don't care */ | |
3f705dfd | 615 | |
ec348020 | 616 | if (i == REG_SP) { |
3f705dfd | 617 | /* |
ec348020 AL |
618 | * If we were using a 16-bit stack segment, then |
619 | * the kernel is a bit stuck: IRET only restores | |
620 | * the low 16 bits of ESP/RSP if SS is 16-bit. | |
621 | * The kernel uses a hack to restore bits 31:16, | |
622 | * but that hack doesn't help with bits 63:32. | |
623 | * On Intel CPUs, bits 63:32 end up zeroed, and, on | |
624 | * AMD CPUs, they leak the high bits of the kernel | |
625 | * espfix64 stack pointer. There's very little that | |
626 | * the kernel can do about it. | |
627 | * | |
628 | * Similarly, if we are returning to a 32-bit context, | |
629 | * the CPU will often lose the high 32 bits of RSP. | |
3f705dfd | 630 | */ |
ec348020 AL |
631 | |
632 | if (res == req) | |
633 | continue; | |
634 | ||
635 | if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) { | |
636 | printf("[NOTE]\tSP: %llx -> %llx\n", | |
637 | (unsigned long long)req, | |
638 | (unsigned long long)res); | |
639 | continue; | |
640 | } | |
641 | ||
642 | printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n", | |
643 | (unsigned long long)requested_regs[i], | |
644 | (unsigned long long)resulting_regs[i]); | |
645 | nerrs++; | |
646 | continue; | |
3f705dfd AL |
647 | } |
648 | ||
649 | bool ignore_reg = false; | |
650 | #if __i386__ | |
651 | if (i == REG_UESP) | |
652 | ignore_reg = true; | |
653 | #else | |
654 | if (i == REG_CSGSFS) { | |
655 | struct selectors *req_sels = | |
656 | (void *)&requested_regs[REG_CSGSFS]; | |
657 | struct selectors *res_sels = | |
658 | (void *)&resulting_regs[REG_CSGSFS]; | |
659 | if (req_sels->cs != res_sels->cs) { | |
660 | printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n", | |
661 | req_sels->cs, res_sels->cs); | |
662 | nerrs++; | |
663 | } | |
664 | ||
665 | if (req_sels->ss != res_sels->ss) { | |
666 | printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n", | |
667 | req_sels->ss, res_sels->ss); | |
668 | nerrs++; | |
669 | } | |
670 | ||
671 | continue; | |
672 | } | |
673 | #endif | |
674 | ||
675 | /* Sanity check on the kernel */ | |
1ef0199a AL |
676 | if (i == REG_CX && requested_regs[i] != resulting_regs[i]) { |
677 | printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", | |
3f705dfd AL |
678 | (unsigned long long)requested_regs[i], |
679 | (unsigned long long)resulting_regs[i]); | |
680 | nerrs++; | |
681 | continue; | |
682 | } | |
683 | ||
684 | if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { | |
3f705dfd AL |
685 | printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", |
686 | i, (unsigned long long)requested_regs[i], | |
687 | (unsigned long long)resulting_regs[i]); | |
688 | nerrs++; | |
689 | } | |
690 | } | |
691 | ||
692 | if (nerrs == 0) | |
693 | printf("[OK]\tall registers okay\n"); | |
694 | ||
695 | return nerrs; | |
696 | } | |
697 | ||
698 | static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) | |
699 | { | |
700 | int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs; | |
701 | if (cs == -1) | |
702 | return 0; | |
703 | ||
704 | sig_cs = cs; | |
705 | sig_ss = ss; | |
706 | ||
707 | printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n", | |
708 | cs_bits, sig_cs, sig_ss); | |
709 | ||
710 | sig_trapped = 0; | |
711 | raise(SIGUSR1); | |
712 | if (sig_trapped) { | |
713 | char errdesc[32] = ""; | |
714 | if (sig_err) { | |
715 | const char *src = (sig_err & 1) ? " EXT" : ""; | |
716 | const char *table; | |
717 | if ((sig_err & 0x6) == 0x0) | |
718 | table = "GDT"; | |
719 | else if ((sig_err & 0x6) == 0x4) | |
720 | table = "LDT"; | |
721 | else if ((sig_err & 0x6) == 0x2) | |
722 | table = "IDT"; | |
723 | else | |
724 | table = "???"; | |
725 | ||
726 | sprintf(errdesc, "%s%s index %d, ", | |
727 | table, src, sig_err >> 3); | |
728 | } | |
729 | ||
730 | char trapname[32]; | |
731 | if (sig_trapno == 13) | |
732 | strcpy(trapname, "GP"); | |
733 | else if (sig_trapno == 11) | |
734 | strcpy(trapname, "NP"); | |
735 | else if (sig_trapno == 12) | |
736 | strcpy(trapname, "SS"); | |
737 | else if (sig_trapno == 32) | |
738 | strcpy(trapname, "IRET"); /* X86_TRAP_IRET */ | |
739 | else | |
740 | sprintf(trapname, "%d", sig_trapno); | |
741 | ||
742 | printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n", | |
743 | trapname, (unsigned long)sig_err, | |
744 | errdesc, strsignal(sig_trapped)); | |
745 | return 0; | |
746 | } else { | |
4f6c8938 AL |
747 | /* |
748 | * This also implicitly tests UC_STRICT_RESTORE_SS: | |
749 | * We check that these signals set UC_STRICT_RESTORE_SS and, | |
750 | * if UC_STRICT_RESTORE_SS doesn't cause strict behavior, | |
751 | * then we won't get SIGSEGV. | |
752 | */ | |
3f705dfd AL |
753 | printf("[FAIL]\tDid not get SIGSEGV\n"); |
754 | return 1; | |
755 | } | |
756 | } | |
757 | ||
758 | int main() | |
759 | { | |
760 | int total_nerrs = 0; | |
761 | unsigned short my_cs, my_ss; | |
762 | ||
763 | asm volatile ("mov %%cs,%0" : "=r" (my_cs)); | |
764 | asm volatile ("mov %%ss,%0" : "=r" (my_ss)); | |
765 | setup_ldt(); | |
766 | ||
767 | stack_t stack = { | |
768 | .ss_sp = altstack_data, | |
769 | .ss_size = SIGSTKSZ, | |
770 | }; | |
771 | if (sigaltstack(&stack, NULL) != 0) | |
772 | err(1, "sigaltstack"); | |
773 | ||
774 | sethandler(SIGUSR1, sigusr1, 0); | |
775 | sethandler(SIGTRAP, sigtrap, SA_ONSTACK); | |
776 | ||
777 | /* Easy cases: return to a 32-bit SS in each possible CS bitness. */ | |
778 | total_nerrs += test_valid_sigreturn(64, false, -1); | |
779 | total_nerrs += test_valid_sigreturn(32, false, -1); | |
780 | total_nerrs += test_valid_sigreturn(16, false, -1); | |
781 | ||
782 | /* | |
783 | * Test easy espfix cases: return to a 16-bit LDT SS in each possible | |
784 | * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. | |
785 | * | |
786 | * This catches the original missing-espfix-on-64-bit-kernels issue | |
787 | * as well as CVE-2014-8134. | |
788 | */ | |
789 | total_nerrs += test_valid_sigreturn(64, true, -1); | |
790 | total_nerrs += test_valid_sigreturn(32, true, -1); | |
791 | total_nerrs += test_valid_sigreturn(16, true, -1); | |
792 | ||
793 | if (gdt_data16_idx) { | |
794 | /* | |
795 | * For performance reasons, Linux skips espfix if SS points | |
796 | * to the GDT. If we were able to allocate a 16-bit SS in | |
797 | * the GDT, see if it leaks parts of the kernel stack pointer. | |
798 | * | |
799 | * This tests for CVE-2014-8133. | |
800 | */ | |
801 | total_nerrs += test_valid_sigreturn(64, true, | |
802 | GDT3(gdt_data16_idx)); | |
803 | total_nerrs += test_valid_sigreturn(32, true, | |
804 | GDT3(gdt_data16_idx)); | |
805 | total_nerrs += test_valid_sigreturn(16, true, | |
806 | GDT3(gdt_data16_idx)); | |
807 | } | |
808 | ||
4f6c8938 AL |
809 | #ifdef __x86_64__ |
810 | /* Nasty ABI case: check SS corruption handling. */ | |
811 | sig_corrupt_final_ss = 1; | |
812 | total_nerrs += test_valid_sigreturn(32, false, -1); | |
813 | total_nerrs += test_valid_sigreturn(32, true, -1); | |
814 | sig_corrupt_final_ss = 0; | |
815 | #endif | |
816 | ||
3f705dfd AL |
817 | /* |
818 | * We're done testing valid sigreturn cases. Now we test states | |
819 | * for which sigreturn itself will succeed but the subsequent | |
820 | * entry to user mode will fail. | |
821 | * | |
822 | * Depending on the failure mode and the kernel bitness, these | |
823 | * entry failures can generate SIGSEGV, SIGBUS, or SIGILL. | |
824 | */ | |
825 | clearhandler(SIGTRAP); | |
826 | sethandler(SIGSEGV, sigtrap, SA_ONSTACK); | |
827 | sethandler(SIGBUS, sigtrap, SA_ONSTACK); | |
828 | sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */ | |
829 | ||
830 | /* Easy failures: invalid SS, resulting in #GP(0) */ | |
831 | test_bad_iret(64, ldt_nonexistent_sel, -1); | |
832 | test_bad_iret(32, ldt_nonexistent_sel, -1); | |
833 | test_bad_iret(16, ldt_nonexistent_sel, -1); | |
834 | ||
835 | /* These fail because SS isn't a data segment, resulting in #GP(SS) */ | |
836 | test_bad_iret(64, my_cs, -1); | |
837 | test_bad_iret(32, my_cs, -1); | |
838 | test_bad_iret(16, my_cs, -1); | |
839 | ||
840 | /* Try to return to a not-present code segment, triggering #NP(SS). */ | |
841 | test_bad_iret(32, my_ss, npcode32_sel); | |
842 | ||
843 | /* | |
844 | * Try to return to a not-present but otherwise valid data segment. | |
845 | * This will cause IRET to fail with #SS on the espfix stack. This | |
846 | * exercises CVE-2014-9322. | |
847 | * | |
848 | * Note that, if espfix is enabled, 64-bit Linux will lose track | |
849 | * of the actual cause of failure and report #GP(0) instead. | |
850 | * This would be very difficult for Linux to avoid, because | |
851 | * espfix64 causes IRET failures to be promoted to #DF, so the | |
852 | * original exception frame is never pushed onto the stack. | |
853 | */ | |
854 | test_bad_iret(32, npdata32_sel, -1); | |
855 | ||
856 | /* | |
857 | * Try to return to a not-present but otherwise valid data | |
858 | * segment without invoking espfix. Newer kernels don't allow | |
859 | * this to happen in the first place. On older kernels, though, | |
860 | * this can trigger CVE-2014-9322. | |
861 | */ | |
862 | if (gdt_npdata32_idx) | |
863 | test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); | |
864 | ||
4f6c8938 AL |
865 | #ifdef __x86_64__ |
866 | total_nerrs += test_nonstrict_ss(); | |
867 | #endif | |
868 | ||
3f705dfd AL |
869 | return total_nerrs ? 1 : 0; |
870 | } |