Commit | Line | Data |
---|---|---|
1e5db223 | 1 | /* |
9774a96f BM |
2 | * umip.c Emulation for instruction protected by the User-Mode Instruction |
3 | * Prevention feature | |
1e5db223 RN |
4 | * |
5 | * Copyright (c) 2017, Intel Corporation. | |
6 | * Ricardo Neri <ricardo.neri-calderon@linux.intel.com> | |
7 | */ | |
8 | ||
9 | #include <linux/uaccess.h> | |
10 | #include <asm/umip.h> | |
11 | #include <asm/traps.h> | |
12 | #include <asm/insn.h> | |
13 | #include <asm/insn-eval.h> | |
c6a960bb RN |
14 | #include <linux/ratelimit.h> |
15 | ||
16 | #undef pr_fmt | |
17 | #define pr_fmt(fmt) "umip: " fmt | |
1e5db223 RN |
18 | |
19 | /** DOC: Emulation for User-Mode Instruction Prevention (UMIP) | |
20 | * | |
9774a96f BM |
21 | * User-Mode Instruction Prevention is a security feature present in recent |
22 | * x86 processors that, when enabled, prevents a group of instructions (SGDT, | |
23 | * SIDT, SLDT, SMSW and STR) from being run in user mode by issuing a general | |
24 | * protection fault if the instruction is executed with CPL > 0. | |
1e5db223 RN |
25 | * |
26 | * Rather than relaying to the user space the general protection fault caused by | |
27 | * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be | |
28 | * trapped and emulate the result of such instructions to provide dummy values. | |
29 | * This allows to both conserve the current kernel behavior and not reveal the | |
30 | * system resources that UMIP intends to protect (i.e., the locations of the | |
31 | * global descriptor and interrupt descriptor tables, the segment selectors of | |
32 | * the local descriptor table, the value of the task state register and the | |
33 | * contents of the CR0 register). | |
34 | * | |
35 | * This emulation is needed because certain applications (e.g., WineHQ and | |
36 | * DOSEMU2) rely on this subset of instructions to function. | |
37 | * | |
38 | * The instructions protected by UMIP can be split in two groups. Those which | |
e86c2c8b BS |
39 | * return a kernel memory address (SGDT and SIDT) and those which return a |
40 | * value (SLDT, STR and SMSW). | |
1e5db223 RN |
41 | * |
42 | * For the instructions that return a kernel memory address, applications | |
43 | * such as WineHQ rely on the result being located in the kernel memory space, | |
44 | * not the actual location of the table. The result is emulated as a hard-coded | |
45 | * value that, lies close to the top of the kernel memory. The limit for the GDT | |
46 | * and the IDT are set to zero. | |
47 | * | |
b91e7089 | 48 | * The instruction SMSW is emulated to return the value that the register CR0 |
1e5db223 | 49 | * has at boot time as set in the head_32. |
b91e7089 BS |
50 | * SLDT and STR are emulated to return the values that the kernel programmatically |
51 | * assigns: | |
52 | * - SLDT returns (GDT_ENTRY_LDT * 8) if an LDT has been set, 0 if not. | |
53 | * - STR returns (GDT_ENTRY_TSS * 8). | |
1e5db223 | 54 | * |
e86c2c8b | 55 | * Emulation is provided for both 32-bit and 64-bit processes. |
1e5db223 RN |
56 | * |
57 | * Care is taken to appropriately emulate the results when segmentation is | |
58 | * used. That is, rather than relying on USER_DS and USER_CS, the function | |
59 | * insn_get_addr_ref() inspects the segment descriptor pointed by the | |
60 | * registers in pt_regs. This ensures that we correctly obtain the segment | |
61 | * base address and the address and operand sizes even if the user space | |
62 | * application uses a local descriptor table. | |
63 | */ | |
64 | ||
e86c2c8b BS |
65 | #define UMIP_DUMMY_GDT_BASE 0xfffffffffffe0000ULL |
66 | #define UMIP_DUMMY_IDT_BASE 0xffffffffffff0000ULL | |
1e5db223 RN |
67 | |
68 | /* | |
69 | * The SGDT and SIDT instructions store the contents of the global descriptor | |
70 | * table and interrupt table registers, respectively. The destination is a | |
71 | * memory operand of X+2 bytes. X bytes are used to store the base address of | |
e86c2c8b BS |
72 | * the table and 2 bytes are used to store the limit. In 32-bit processes X |
73 | * has a value of 4, in 64-bit processes X has a value of 8. | |
1e5db223 | 74 | */ |
e86c2c8b BS |
75 | #define UMIP_GDT_IDT_BASE_SIZE_64BIT 8 |
76 | #define UMIP_GDT_IDT_BASE_SIZE_32BIT 4 | |
1e5db223 RN |
77 | #define UMIP_GDT_IDT_LIMIT_SIZE 2 |
78 | ||
79 | #define UMIP_INST_SGDT 0 /* 0F 01 /0 */ | |
80 | #define UMIP_INST_SIDT 1 /* 0F 01 /1 */ | |
6e2a3064 RN |
81 | #define UMIP_INST_SMSW 2 /* 0F 01 /4 */ |
82 | #define UMIP_INST_SLDT 3 /* 0F 00 /0 */ | |
83 | #define UMIP_INST_STR 4 /* 0F 00 /1 */ | |
1e5db223 | 84 | |
b0e387c3 | 85 | static const char * const umip_insns[5] = { |
fd11a649 RN |
86 | [UMIP_INST_SGDT] = "SGDT", |
87 | [UMIP_INST_SIDT] = "SIDT", | |
88 | [UMIP_INST_SMSW] = "SMSW", | |
89 | [UMIP_INST_SLDT] = "SLDT", | |
90 | [UMIP_INST_STR] = "STR", | |
91 | }; | |
92 | ||
93 | #define umip_pr_err(regs, fmt, ...) \ | |
94 | umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__) | |
8d3bcc44 | 95 | #define umip_pr_warn(regs, fmt, ...) \ |
fd11a649 RN |
96 | umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__) |
97 | ||
98 | /** | |
99 | * umip_printk() - Print a rate-limited message | |
100 | * @regs: Register set with the context in which the warning is printed | |
101 | * @log_level: Kernel log level to print the message | |
102 | * @fmt: The text string to print | |
103 | * | |
104 | * Print the text contained in @fmt. The print rate is limited to bursts of 5 | |
105 | * messages every two minutes. The purpose of this customized version of | |
106 | * printk() is to print messages when user space processes use any of the | |
107 | * UMIP-protected instructions. Thus, the printed text is prepended with the | |
108 | * task name and process ID number of the current task as well as the | |
109 | * instruction and stack pointers in @regs as seen when entering kernel mode. | |
110 | * | |
111 | * Returns: | |
112 | * | |
113 | * None. | |
114 | */ | |
115 | static __printf(3, 4) | |
116 | void umip_printk(const struct pt_regs *regs, const char *log_level, | |
117 | const char *fmt, ...) | |
118 | { | |
119 | /* Bursts of 5 messages every two minutes */ | |
120 | static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5); | |
121 | struct task_struct *tsk = current; | |
122 | struct va_format vaf; | |
123 | va_list args; | |
124 | ||
125 | if (!__ratelimit(&ratelimit)) | |
126 | return; | |
127 | ||
128 | va_start(args, fmt); | |
129 | vaf.fmt = fmt; | |
130 | vaf.va = &args; | |
131 | printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm, | |
132 | task_pid_nr(tsk), regs->ip, regs->sp, &vaf); | |
133 | va_end(args); | |
134 | } | |
135 | ||
1e5db223 RN |
136 | /** |
137 | * identify_insn() - Identify a UMIP-protected instruction | |
138 | * @insn: Instruction structure with opcode and ModRM byte. | |
139 | * | |
140 | * From the opcode and ModRM.reg in @insn identify, if any, a UMIP-protected | |
141 | * instruction that can be emulated. | |
142 | * | |
143 | * Returns: | |
144 | * | |
145 | * On success, a constant identifying a specific UMIP-protected instruction that | |
146 | * can be emulated. | |
147 | * | |
148 | * -EINVAL on error or when not an UMIP-protected instruction that can be | |
149 | * emulated. | |
150 | */ | |
151 | static int identify_insn(struct insn *insn) | |
152 | { | |
153 | /* By getting modrm we also get the opcode. */ | |
154 | insn_get_modrm(insn); | |
155 | ||
156 | if (!insn->modrm.nbytes) | |
157 | return -EINVAL; | |
158 | ||
159 | /* All the instructions of interest start with 0x0f. */ | |
160 | if (insn->opcode.bytes[0] != 0xf) | |
161 | return -EINVAL; | |
162 | ||
163 | if (insn->opcode.bytes[1] == 0x1) { | |
164 | switch (X86_MODRM_REG(insn->modrm.value)) { | |
165 | case 0: | |
166 | return UMIP_INST_SGDT; | |
167 | case 1: | |
168 | return UMIP_INST_SIDT; | |
169 | case 4: | |
170 | return UMIP_INST_SMSW; | |
171 | default: | |
172 | return -EINVAL; | |
173 | } | |
6e2a3064 RN |
174 | } else if (insn->opcode.bytes[1] == 0x0) { |
175 | if (X86_MODRM_REG(insn->modrm.value) == 0) | |
176 | return UMIP_INST_SLDT; | |
177 | else if (X86_MODRM_REG(insn->modrm.value) == 1) | |
178 | return UMIP_INST_STR; | |
179 | else | |
180 | return -EINVAL; | |
181 | } else { | |
182 | return -EINVAL; | |
1e5db223 | 183 | } |
1e5db223 RN |
184 | } |
185 | ||
186 | /** | |
187 | * emulate_umip_insn() - Emulate UMIP instructions and return dummy values | |
188 | * @insn: Instruction structure with operands | |
189 | * @umip_inst: A constant indicating the instruction to emulate | |
190 | * @data: Buffer into which the dummy result is stored | |
191 | * @data_size: Size of the emulated result | |
e86c2c8b | 192 | * @x86_64: true if process is 64-bit, false otherwise |
1e5db223 RN |
193 | * |
194 | * Emulate an instruction protected by UMIP and provide a dummy result. The | |
195 | * result of the emulation is saved in @data. The size of the results depends | |
196 | * on both the instruction and type of operand (register vs memory address). | |
197 | * The size of the result is updated in @data_size. Caller is responsible | |
198 | * of providing a @data buffer of at least UMIP_GDT_IDT_BASE_SIZE + | |
199 | * UMIP_GDT_IDT_LIMIT_SIZE bytes. | |
200 | * | |
201 | * Returns: | |
202 | * | |
203 | * 0 on success, -EINVAL on error while emulating. | |
204 | */ | |
205 | static int emulate_umip_insn(struct insn *insn, int umip_inst, | |
e86c2c8b | 206 | unsigned char *data, int *data_size, bool x86_64) |
1e5db223 | 207 | { |
1e5db223 RN |
208 | if (!data || !data_size || !insn) |
209 | return -EINVAL; | |
210 | /* | |
211 | * These two instructions return the base address and limit of the | |
212 | * global and interrupt descriptor table, respectively. According to the | |
213 | * Intel Software Development manual, the base address can be 24-bit, | |
214 | * 32-bit or 64-bit. Limit is always 16-bit. If the operand size is | |
215 | * 16-bit, the returned value of the base address is supposed to be a | |
216 | * zero-extended 24-byte number. However, it seems that a 32-byte number | |
217 | * is always returned irrespective of the operand size. | |
218 | */ | |
219 | if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) { | |
e86c2c8b BS |
220 | u64 dummy_base_addr; |
221 | u16 dummy_limit = 0; | |
222 | ||
1e5db223 RN |
223 | /* SGDT and SIDT do not use registers operands. */ |
224 | if (X86_MODRM_MOD(insn->modrm.value) == 3) | |
225 | return -EINVAL; | |
226 | ||
227 | if (umip_inst == UMIP_INST_SGDT) | |
228 | dummy_base_addr = UMIP_DUMMY_GDT_BASE; | |
229 | else | |
230 | dummy_base_addr = UMIP_DUMMY_IDT_BASE; | |
231 | ||
e86c2c8b BS |
232 | /* |
233 | * 64-bit processes use the entire dummy base address. | |
234 | * 32-bit processes use the lower 32 bits of the base address. | |
235 | * dummy_base_addr is always 64 bits, but we memcpy the correct | |
236 | * number of bytes from it to the destination. | |
237 | */ | |
238 | if (x86_64) | |
239 | *data_size = UMIP_GDT_IDT_BASE_SIZE_64BIT; | |
240 | else | |
241 | *data_size = UMIP_GDT_IDT_BASE_SIZE_32BIT; | |
242 | ||
243 | memcpy(data + 2, &dummy_base_addr, *data_size); | |
1e5db223 | 244 | |
e86c2c8b | 245 | *data_size += UMIP_GDT_IDT_LIMIT_SIZE; |
1e5db223 RN |
246 | memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE); |
247 | ||
b91e7089 BS |
248 | } else if (umip_inst == UMIP_INST_SMSW || umip_inst == UMIP_INST_SLDT || |
249 | umip_inst == UMIP_INST_STR) { | |
250 | unsigned long dummy_value; | |
251 | ||
252 | if (umip_inst == UMIP_INST_SMSW) { | |
253 | dummy_value = CR0_STATE; | |
254 | } else if (umip_inst == UMIP_INST_STR) { | |
255 | dummy_value = GDT_ENTRY_TSS * 8; | |
256 | } else if (umip_inst == UMIP_INST_SLDT) { | |
257 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | |
258 | down_read(¤t->mm->context.ldt_usr_sem); | |
259 | if (current->mm->context.ldt) | |
260 | dummy_value = GDT_ENTRY_LDT * 8; | |
261 | else | |
262 | dummy_value = 0; | |
263 | up_read(¤t->mm->context.ldt_usr_sem); | |
264 | #else | |
265 | dummy_value = 0; | |
266 | #endif | |
267 | } | |
1e5db223 RN |
268 | |
269 | /* | |
b91e7089 | 270 | * For these 3 instructions, the number |
1e5db223 RN |
271 | * of bytes to be copied in the result buffer is determined |
272 | * by whether the operand is a register or a memory location. | |
273 | * If operand is a register, return as many bytes as the operand | |
274 | * size. If operand is memory, return only the two least | |
b91e7089 | 275 | * siginificant bytes. |
1e5db223 RN |
276 | */ |
277 | if (X86_MODRM_MOD(insn->modrm.value) == 3) | |
278 | *data_size = insn->opnd_bytes; | |
279 | else | |
280 | *data_size = 2; | |
281 | ||
282 | memcpy(data, &dummy_value, *data_size); | |
1e5db223 RN |
283 | } else { |
284 | return -EINVAL; | |
285 | } | |
286 | ||
287 | return 0; | |
288 | } | |
289 | ||
c6a960bb RN |
290 | /** |
291 | * force_sig_info_umip_fault() - Force a SIGSEGV with SEGV_MAPERR | |
292 | * @addr: Address that caused the signal | |
293 | * @regs: Register set containing the instruction pointer | |
294 | * | |
295 | * Force a SIGSEGV signal with SEGV_MAPERR as the error code. This function is | |
296 | * intended to be used to provide a segmentation fault when the result of the | |
297 | * UMIP emulation could not be copied to the user space memory. | |
298 | * | |
299 | * Returns: none | |
300 | */ | |
301 | static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs) | |
302 | { | |
c6a960bb RN |
303 | struct task_struct *tsk = current; |
304 | ||
305 | tsk->thread.cr2 = (unsigned long)addr; | |
306 | tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE; | |
307 | tsk->thread.trap_nr = X86_TRAP_PF; | |
308 | ||
2e1661d2 | 309 | force_sig_fault(SIGSEGV, SEGV_MAPERR, addr); |
c6a960bb RN |
310 | |
311 | if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV))) | |
312 | return; | |
313 | ||
fd11a649 RN |
314 | umip_pr_err(regs, "segfault in emulation. error%x\n", |
315 | X86_PF_USER | X86_PF_WRITE); | |
c6a960bb RN |
316 | } |
317 | ||
1e5db223 RN |
318 | /** |
319 | * fixup_umip_exception() - Fixup a general protection fault caused by UMIP | |
320 | * @regs: Registers as saved when entering the #GP handler | |
321 | * | |
e86c2c8b BS |
322 | * The instructions SGDT, SIDT, STR, SMSW and SLDT cause a general protection |
323 | * fault if executed with CPL > 0 (i.e., from user space). This function fixes | |
324 | * the exception up and provides dummy results for SGDT, SIDT and SMSW; STR | |
325 | * and SLDT are not fixed up. | |
1e5db223 RN |
326 | * |
327 | * If operands are memory addresses, results are copied to user-space memory as | |
328 | * indicated by the instruction pointed by eIP using the registers indicated in | |
329 | * the instruction operands. If operands are registers, results are copied into | |
330 | * the context that was saved when entering kernel mode. | |
331 | * | |
332 | * Returns: | |
333 | * | |
334 | * True if emulation was successful; false if not. | |
335 | */ | |
336 | bool fixup_umip_exception(struct pt_regs *regs) | |
337 | { | |
338 | int not_copied, nr_copied, reg_offset, dummy_data_size, umip_inst; | |
339 | unsigned long seg_base = 0, *reg_addr; | |
340 | /* 10 bytes is the maximum size of the result of UMIP instructions */ | |
341 | unsigned char dummy_data[10] = { 0 }; | |
342 | unsigned char buf[MAX_INSN_SIZE]; | |
343 | void __user *uaddr; | |
344 | struct insn insn; | |
e2a5dca7 | 345 | int seg_defs; |
1e5db223 RN |
346 | |
347 | if (!regs) | |
348 | return false; | |
349 | ||
1e5db223 RN |
350 | /* |
351 | * If not in user-space long mode, a custom code segment could be in | |
352 | * use. This is true in protected mode (if the process defined a local | |
353 | * descriptor table), or virtual-8086 mode. In most of the cases | |
354 | * seg_base will be zero as in USER_CS. | |
355 | */ | |
356 | if (!user_64bit_mode(regs)) | |
357 | seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); | |
358 | ||
359 | if (seg_base == -1L) | |
360 | return false; | |
361 | ||
362 | not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip), | |
363 | sizeof(buf)); | |
364 | nr_copied = sizeof(buf) - not_copied; | |
365 | ||
366 | /* | |
367 | * The copy_from_user above could have failed if user code is protected | |
368 | * by a memory protection key. Give up on emulation in such a case. | |
369 | * Should we issue a page fault? | |
370 | */ | |
371 | if (!nr_copied) | |
372 | return false; | |
373 | ||
374 | insn_init(&insn, buf, nr_copied, user_64bit_mode(regs)); | |
375 | ||
376 | /* | |
377 | * Override the default operand and address sizes with what is specified | |
378 | * in the code segment descriptor. The instruction decoder only sets | |
379 | * the address size it to either 4 or 8 address bytes and does nothing | |
380 | * for the operand bytes. This OK for most of the cases, but we could | |
381 | * have special cases where, for instance, a 16-bit code segment | |
382 | * descriptor is used. | |
383 | * If there is an address override prefix, the instruction decoder | |
384 | * correctly updates these values, even for 16-bit defaults. | |
385 | */ | |
386 | seg_defs = insn_get_code_seg_params(regs); | |
387 | if (seg_defs == -EINVAL) | |
388 | return false; | |
389 | ||
390 | insn.addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs); | |
391 | insn.opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs); | |
392 | ||
393 | insn_get_length(&insn); | |
394 | if (nr_copied < insn.length) | |
395 | return false; | |
396 | ||
397 | umip_inst = identify_insn(&insn); | |
398 | if (umip_inst < 0) | |
399 | return false; | |
400 | ||
8d3bcc44 | 401 | umip_pr_warn(regs, "%s instruction cannot be used by applications.\n", |
fd11a649 RN |
402 | umip_insns[umip_inst]); |
403 | ||
8d3bcc44 | 404 | umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n"); |
fd11a649 | 405 | |
e86c2c8b BS |
406 | if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size, |
407 | user_64bit_mode(regs))) | |
1e5db223 RN |
408 | return false; |
409 | ||
410 | /* | |
411 | * If operand is a register, write result to the copy of the register | |
412 | * value that was pushed to the stack when entering into kernel mode. | |
413 | * Upon exit, the value we write will be restored to the actual hardware | |
414 | * register. | |
415 | */ | |
416 | if (X86_MODRM_MOD(insn.modrm.value) == 3) { | |
417 | reg_offset = insn_get_modrm_rm_off(&insn, regs); | |
418 | ||
419 | /* | |
420 | * Negative values are usually errors. In memory addressing, | |
421 | * the exception is -EDOM. Since we expect a register operand, | |
422 | * all negative values are errors. | |
423 | */ | |
424 | if (reg_offset < 0) | |
425 | return false; | |
426 | ||
427 | reg_addr = (unsigned long *)((unsigned long)regs + reg_offset); | |
428 | memcpy(reg_addr, dummy_data, dummy_data_size); | |
429 | } else { | |
430 | uaddr = insn_get_addr_ref(&insn, regs); | |
431 | if ((unsigned long)uaddr == -1L) | |
432 | return false; | |
433 | ||
434 | nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size); | |
c6a960bb RN |
435 | if (nr_copied > 0) { |
436 | /* | |
437 | * If copy fails, send a signal and tell caller that | |
438 | * fault was fixed up. | |
439 | */ | |
440 | force_sig_info_umip_fault(uaddr, regs); | |
441 | return true; | |
442 | } | |
1e5db223 RN |
443 | } |
444 | ||
445 | /* increase IP to let the program keep going */ | |
446 | regs->ip += insn.length; | |
447 | return true; | |
448 | } |