Commit | Line | Data |
---|---|---|
57319d80 QR |
1 | /* |
2 | * mpx.c - Memory Protection eXtensions | |
3 | * | |
4 | * Copyright (c) 2014, Intel Corporation. | |
5 | * Qiaowei Ren <qiaowei.ren@intel.com> | |
6 | * Dave Hansen <dave.hansen@intel.com> | |
7 | */ | |
8 | #include <linux/kernel.h> | |
fcc7ffd6 | 9 | #include <linux/slab.h> |
57319d80 QR |
10 | #include <linux/syscalls.h> |
11 | #include <linux/sched/sysctl.h> | |
12 | ||
13 | #include <asm/mman.h> | |
14 | #include <asm/mpx.h> | |
15 | ||
16 | static const char *mpx_mapping_name(struct vm_area_struct *vma) | |
17 | { | |
18 | return "[mpx]"; | |
19 | } | |
20 | ||
21 | static struct vm_operations_struct mpx_vma_ops = { | |
22 | .name = mpx_mapping_name, | |
23 | }; | |
24 | ||
25 | /* | |
26 | * This is really a simplified "vm_mmap". it only handles MPX | |
27 | * bounds tables (the bounds directory is user-allocated). | |
28 | * | |
29 | * Later on, we use the vma->vm_ops to uniquely identify these | |
30 | * VMAs. | |
31 | */ | |
32 | static unsigned long mpx_mmap(unsigned long len) | |
33 | { | |
34 | unsigned long ret; | |
35 | unsigned long addr, pgoff; | |
36 | struct mm_struct *mm = current->mm; | |
37 | vm_flags_t vm_flags; | |
38 | struct vm_area_struct *vma; | |
39 | ||
40 | /* Only bounds table and bounds directory can be allocated here */ | |
41 | if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES) | |
42 | return -EINVAL; | |
43 | ||
44 | down_write(&mm->mmap_sem); | |
45 | ||
46 | /* Too many mappings? */ | |
47 | if (mm->map_count > sysctl_max_map_count) { | |
48 | ret = -ENOMEM; | |
49 | goto out; | |
50 | } | |
51 | ||
52 | /* Obtain the address to map to. we verify (or select) it and ensure | |
53 | * that it represents a valid section of the address space. | |
54 | */ | |
55 | addr = get_unmapped_area(NULL, 0, len, 0, MAP_ANONYMOUS | MAP_PRIVATE); | |
56 | if (addr & ~PAGE_MASK) { | |
57 | ret = addr; | |
58 | goto out; | |
59 | } | |
60 | ||
61 | vm_flags = VM_READ | VM_WRITE | VM_MPX | | |
62 | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; | |
63 | ||
64 | /* Set pgoff according to addr for anon_vma */ | |
65 | pgoff = addr >> PAGE_SHIFT; | |
66 | ||
67 | ret = mmap_region(NULL, addr, len, vm_flags, pgoff); | |
68 | if (IS_ERR_VALUE(ret)) | |
69 | goto out; | |
70 | ||
71 | vma = find_vma(mm, ret); | |
72 | if (!vma) { | |
73 | ret = -ENOMEM; | |
74 | goto out; | |
75 | } | |
76 | vma->vm_ops = &mpx_vma_ops; | |
77 | ||
78 | if (vm_flags & VM_LOCKED) { | |
79 | up_write(&mm->mmap_sem); | |
80 | mm_populate(ret, len); | |
81 | return ret; | |
82 | } | |
83 | ||
84 | out: | |
85 | up_write(&mm->mmap_sem); | |
86 | return ret; | |
87 | } | |
fcc7ffd6 DH |
88 | |
89 | enum reg_type { | |
90 | REG_TYPE_RM = 0, | |
91 | REG_TYPE_INDEX, | |
92 | REG_TYPE_BASE, | |
93 | }; | |
94 | ||
95 | static unsigned long get_reg_offset(struct insn *insn, struct pt_regs *regs, | |
96 | enum reg_type type) | |
97 | { | |
98 | int regno = 0; | |
99 | ||
100 | static const int regoff[] = { | |
101 | offsetof(struct pt_regs, ax), | |
102 | offsetof(struct pt_regs, cx), | |
103 | offsetof(struct pt_regs, dx), | |
104 | offsetof(struct pt_regs, bx), | |
105 | offsetof(struct pt_regs, sp), | |
106 | offsetof(struct pt_regs, bp), | |
107 | offsetof(struct pt_regs, si), | |
108 | offsetof(struct pt_regs, di), | |
109 | #ifdef CONFIG_X86_64 | |
110 | offsetof(struct pt_regs, r8), | |
111 | offsetof(struct pt_regs, r9), | |
112 | offsetof(struct pt_regs, r10), | |
113 | offsetof(struct pt_regs, r11), | |
114 | offsetof(struct pt_regs, r12), | |
115 | offsetof(struct pt_regs, r13), | |
116 | offsetof(struct pt_regs, r14), | |
117 | offsetof(struct pt_regs, r15), | |
118 | #endif | |
119 | }; | |
120 | int nr_registers = ARRAY_SIZE(regoff); | |
121 | /* | |
122 | * Don't possibly decode a 32-bit instructions as | |
123 | * reading a 64-bit-only register. | |
124 | */ | |
125 | if (IS_ENABLED(CONFIG_X86_64) && !insn->x86_64) | |
126 | nr_registers -= 8; | |
127 | ||
128 | switch (type) { | |
129 | case REG_TYPE_RM: | |
130 | regno = X86_MODRM_RM(insn->modrm.value); | |
131 | if (X86_REX_B(insn->rex_prefix.value) == 1) | |
132 | regno += 8; | |
133 | break; | |
134 | ||
135 | case REG_TYPE_INDEX: | |
136 | regno = X86_SIB_INDEX(insn->sib.value); | |
137 | if (X86_REX_X(insn->rex_prefix.value) == 1) | |
138 | regno += 8; | |
139 | break; | |
140 | ||
141 | case REG_TYPE_BASE: | |
142 | regno = X86_SIB_BASE(insn->sib.value); | |
143 | if (X86_REX_B(insn->rex_prefix.value) == 1) | |
144 | regno += 8; | |
145 | break; | |
146 | ||
147 | default: | |
148 | pr_err("invalid register type"); | |
149 | BUG(); | |
150 | break; | |
151 | } | |
152 | ||
153 | if (regno > nr_registers) { | |
154 | WARN_ONCE(1, "decoded an instruction with an invalid register"); | |
155 | return -EINVAL; | |
156 | } | |
157 | return regoff[regno]; | |
158 | } | |
159 | ||
160 | /* | |
161 | * return the address being referenced be instruction | |
162 | * for rm=3 returning the content of the rm reg | |
163 | * for rm!=3 calculates the address using SIB and Disp | |
164 | */ | |
165 | static void __user *mpx_get_addr_ref(struct insn *insn, struct pt_regs *regs) | |
166 | { | |
167 | unsigned long addr, addr_offset; | |
168 | unsigned long base, base_offset; | |
169 | unsigned long indx, indx_offset; | |
170 | insn_byte_t sib; | |
171 | ||
172 | insn_get_modrm(insn); | |
173 | insn_get_sib(insn); | |
174 | sib = insn->sib.value; | |
175 | ||
176 | if (X86_MODRM_MOD(insn->modrm.value) == 3) { | |
177 | addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM); | |
178 | if (addr_offset < 0) | |
179 | goto out_err; | |
180 | addr = regs_get_register(regs, addr_offset); | |
181 | } else { | |
182 | if (insn->sib.nbytes) { | |
183 | base_offset = get_reg_offset(insn, regs, REG_TYPE_BASE); | |
184 | if (base_offset < 0) | |
185 | goto out_err; | |
186 | ||
187 | indx_offset = get_reg_offset(insn, regs, REG_TYPE_INDEX); | |
188 | if (indx_offset < 0) | |
189 | goto out_err; | |
190 | ||
191 | base = regs_get_register(regs, base_offset); | |
192 | indx = regs_get_register(regs, indx_offset); | |
193 | addr = base + indx * (1 << X86_SIB_SCALE(sib)); | |
194 | } else { | |
195 | addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM); | |
196 | if (addr_offset < 0) | |
197 | goto out_err; | |
198 | addr = regs_get_register(regs, addr_offset); | |
199 | } | |
200 | addr += insn->displacement.value; | |
201 | } | |
202 | return (void __user *)addr; | |
203 | out_err: | |
204 | return (void __user *)-1; | |
205 | } | |
206 | ||
207 | static int mpx_insn_decode(struct insn *insn, | |
208 | struct pt_regs *regs) | |
209 | { | |
210 | unsigned char buf[MAX_INSN_SIZE]; | |
211 | int x86_64 = !test_thread_flag(TIF_IA32); | |
212 | int not_copied; | |
213 | int nr_copied; | |
214 | ||
215 | not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf)); | |
216 | nr_copied = sizeof(buf) - not_copied; | |
217 | /* | |
218 | * The decoder _should_ fail nicely if we pass it a short buffer. | |
219 | * But, let's not depend on that implementation detail. If we | |
220 | * did not get anything, just error out now. | |
221 | */ | |
222 | if (!nr_copied) | |
223 | return -EFAULT; | |
224 | insn_init(insn, buf, nr_copied, x86_64); | |
225 | insn_get_length(insn); | |
226 | /* | |
227 | * copy_from_user() tries to get as many bytes as we could see in | |
228 | * the largest possible instruction. If the instruction we are | |
229 | * after is shorter than that _and_ we attempt to copy from | |
230 | * something unreadable, we might get a short read. This is OK | |
231 | * as long as the read did not stop in the middle of the | |
232 | * instruction. Check to see if we got a partial instruction. | |
233 | */ | |
234 | if (nr_copied < insn->length) | |
235 | return -EFAULT; | |
236 | ||
237 | insn_get_opcode(insn); | |
238 | /* | |
239 | * We only _really_ need to decode bndcl/bndcn/bndcu | |
240 | * Error out on anything else. | |
241 | */ | |
242 | if (insn->opcode.bytes[0] != 0x0f) | |
243 | goto bad_opcode; | |
244 | if ((insn->opcode.bytes[1] != 0x1a) && | |
245 | (insn->opcode.bytes[1] != 0x1b)) | |
246 | goto bad_opcode; | |
247 | ||
248 | return 0; | |
249 | bad_opcode: | |
250 | return -EINVAL; | |
251 | } | |
252 | ||
253 | /* | |
254 | * If a bounds overflow occurs then a #BR is generated. This | |
255 | * function decodes MPX instructions to get violation address | |
256 | * and set this address into extended struct siginfo. | |
257 | * | |
258 | * Note that this is not a super precise way of doing this. | |
259 | * Userspace could have, by the time we get here, written | |
260 | * anything it wants in to the instructions. We can not | |
261 | * trust anything about it. They might not be valid | |
262 | * instructions or might encode invalid registers, etc... | |
263 | * | |
264 | * The caller is expected to kfree() the returned siginfo_t. | |
265 | */ | |
266 | siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, | |
267 | struct xsave_struct *xsave_buf) | |
268 | { | |
269 | struct insn insn; | |
270 | uint8_t bndregno; | |
271 | int err; | |
272 | siginfo_t *info; | |
273 | ||
274 | err = mpx_insn_decode(&insn, regs); | |
275 | if (err) | |
276 | goto err_out; | |
277 | ||
278 | /* | |
279 | * We know at this point that we are only dealing with | |
280 | * MPX instructions. | |
281 | */ | |
282 | insn_get_modrm(&insn); | |
283 | bndregno = X86_MODRM_REG(insn.modrm.value); | |
284 | if (bndregno > 3) { | |
285 | err = -EINVAL; | |
286 | goto err_out; | |
287 | } | |
288 | info = kzalloc(sizeof(*info), GFP_KERNEL); | |
289 | if (!info) { | |
290 | err = -ENOMEM; | |
291 | goto err_out; | |
292 | } | |
293 | /* | |
294 | * The registers are always 64-bit, but the upper 32 | |
295 | * bits are ignored in 32-bit mode. Also, note that the | |
296 | * upper bounds are architecturally represented in 1's | |
297 | * complement form. | |
298 | * | |
299 | * The 'unsigned long' cast is because the compiler | |
300 | * complains when casting from integers to different-size | |
301 | * pointers. | |
302 | */ | |
303 | info->si_lower = (void __user *)(unsigned long) | |
304 | (xsave_buf->bndreg[bndregno].lower_bound); | |
305 | info->si_upper = (void __user *)(unsigned long) | |
306 | (~xsave_buf->bndreg[bndregno].upper_bound); | |
307 | info->si_addr_lsb = 0; | |
308 | info->si_signo = SIGSEGV; | |
309 | info->si_errno = 0; | |
310 | info->si_code = SEGV_BNDERR; | |
311 | info->si_addr = mpx_get_addr_ref(&insn, regs); | |
312 | /* | |
313 | * We were not able to extract an address from the instruction, | |
314 | * probably because there was something invalid in it. | |
315 | */ | |
316 | if (info->si_addr == (void *)-1) { | |
317 | err = -EINVAL; | |
318 | goto err_out; | |
319 | } | |
320 | return info; | |
321 | err_out: | |
322 | return ERR_PTR(err); | |
323 | } |