x86, mpx: Decode MPX instruction to get bound violation information
[linux-2.6-block.git] / arch / x86 / mm / mpx.c
CommitLineData
57319d80
QR
1/*
2 * mpx.c - Memory Protection eXtensions
3 *
4 * Copyright (c) 2014, Intel Corporation.
5 * Qiaowei Ren <qiaowei.ren@intel.com>
6 * Dave Hansen <dave.hansen@intel.com>
7 */
8#include <linux/kernel.h>
fcc7ffd6 9#include <linux/slab.h>
57319d80
QR
10#include <linux/syscalls.h>
11#include <linux/sched/sysctl.h>
12
13#include <asm/mman.h>
14#include <asm/mpx.h>
15
16static const char *mpx_mapping_name(struct vm_area_struct *vma)
17{
18 return "[mpx]";
19}
20
21static struct vm_operations_struct mpx_vma_ops = {
22 .name = mpx_mapping_name,
23};
24
25/*
26 * This is really a simplified "vm_mmap". it only handles MPX
27 * bounds tables (the bounds directory is user-allocated).
28 *
29 * Later on, we use the vma->vm_ops to uniquely identify these
30 * VMAs.
31 */
32static unsigned long mpx_mmap(unsigned long len)
33{
34 unsigned long ret;
35 unsigned long addr, pgoff;
36 struct mm_struct *mm = current->mm;
37 vm_flags_t vm_flags;
38 struct vm_area_struct *vma;
39
40 /* Only bounds table and bounds directory can be allocated here */
41 if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES)
42 return -EINVAL;
43
44 down_write(&mm->mmap_sem);
45
46 /* Too many mappings? */
47 if (mm->map_count > sysctl_max_map_count) {
48 ret = -ENOMEM;
49 goto out;
50 }
51
52 /* Obtain the address to map to. we verify (or select) it and ensure
53 * that it represents a valid section of the address space.
54 */
55 addr = get_unmapped_area(NULL, 0, len, 0, MAP_ANONYMOUS | MAP_PRIVATE);
56 if (addr & ~PAGE_MASK) {
57 ret = addr;
58 goto out;
59 }
60
61 vm_flags = VM_READ | VM_WRITE | VM_MPX |
62 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
63
64 /* Set pgoff according to addr for anon_vma */
65 pgoff = addr >> PAGE_SHIFT;
66
67 ret = mmap_region(NULL, addr, len, vm_flags, pgoff);
68 if (IS_ERR_VALUE(ret))
69 goto out;
70
71 vma = find_vma(mm, ret);
72 if (!vma) {
73 ret = -ENOMEM;
74 goto out;
75 }
76 vma->vm_ops = &mpx_vma_ops;
77
78 if (vm_flags & VM_LOCKED) {
79 up_write(&mm->mmap_sem);
80 mm_populate(ret, len);
81 return ret;
82 }
83
84out:
85 up_write(&mm->mmap_sem);
86 return ret;
87}
fcc7ffd6
DH
88
89enum reg_type {
90 REG_TYPE_RM = 0,
91 REG_TYPE_INDEX,
92 REG_TYPE_BASE,
93};
94
95static unsigned long get_reg_offset(struct insn *insn, struct pt_regs *regs,
96 enum reg_type type)
97{
98 int regno = 0;
99
100 static const int regoff[] = {
101 offsetof(struct pt_regs, ax),
102 offsetof(struct pt_regs, cx),
103 offsetof(struct pt_regs, dx),
104 offsetof(struct pt_regs, bx),
105 offsetof(struct pt_regs, sp),
106 offsetof(struct pt_regs, bp),
107 offsetof(struct pt_regs, si),
108 offsetof(struct pt_regs, di),
109#ifdef CONFIG_X86_64
110 offsetof(struct pt_regs, r8),
111 offsetof(struct pt_regs, r9),
112 offsetof(struct pt_regs, r10),
113 offsetof(struct pt_regs, r11),
114 offsetof(struct pt_regs, r12),
115 offsetof(struct pt_regs, r13),
116 offsetof(struct pt_regs, r14),
117 offsetof(struct pt_regs, r15),
118#endif
119 };
120 int nr_registers = ARRAY_SIZE(regoff);
121 /*
122 * Don't possibly decode a 32-bit instructions as
123 * reading a 64-bit-only register.
124 */
125 if (IS_ENABLED(CONFIG_X86_64) && !insn->x86_64)
126 nr_registers -= 8;
127
128 switch (type) {
129 case REG_TYPE_RM:
130 regno = X86_MODRM_RM(insn->modrm.value);
131 if (X86_REX_B(insn->rex_prefix.value) == 1)
132 regno += 8;
133 break;
134
135 case REG_TYPE_INDEX:
136 regno = X86_SIB_INDEX(insn->sib.value);
137 if (X86_REX_X(insn->rex_prefix.value) == 1)
138 regno += 8;
139 break;
140
141 case REG_TYPE_BASE:
142 regno = X86_SIB_BASE(insn->sib.value);
143 if (X86_REX_B(insn->rex_prefix.value) == 1)
144 regno += 8;
145 break;
146
147 default:
148 pr_err("invalid register type");
149 BUG();
150 break;
151 }
152
153 if (regno > nr_registers) {
154 WARN_ONCE(1, "decoded an instruction with an invalid register");
155 return -EINVAL;
156 }
157 return regoff[regno];
158}
159
160/*
161 * return the address being referenced be instruction
162 * for rm=3 returning the content of the rm reg
163 * for rm!=3 calculates the address using SIB and Disp
164 */
165static void __user *mpx_get_addr_ref(struct insn *insn, struct pt_regs *regs)
166{
167 unsigned long addr, addr_offset;
168 unsigned long base, base_offset;
169 unsigned long indx, indx_offset;
170 insn_byte_t sib;
171
172 insn_get_modrm(insn);
173 insn_get_sib(insn);
174 sib = insn->sib.value;
175
176 if (X86_MODRM_MOD(insn->modrm.value) == 3) {
177 addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM);
178 if (addr_offset < 0)
179 goto out_err;
180 addr = regs_get_register(regs, addr_offset);
181 } else {
182 if (insn->sib.nbytes) {
183 base_offset = get_reg_offset(insn, regs, REG_TYPE_BASE);
184 if (base_offset < 0)
185 goto out_err;
186
187 indx_offset = get_reg_offset(insn, regs, REG_TYPE_INDEX);
188 if (indx_offset < 0)
189 goto out_err;
190
191 base = regs_get_register(regs, base_offset);
192 indx = regs_get_register(regs, indx_offset);
193 addr = base + indx * (1 << X86_SIB_SCALE(sib));
194 } else {
195 addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM);
196 if (addr_offset < 0)
197 goto out_err;
198 addr = regs_get_register(regs, addr_offset);
199 }
200 addr += insn->displacement.value;
201 }
202 return (void __user *)addr;
203out_err:
204 return (void __user *)-1;
205}
206
207static int mpx_insn_decode(struct insn *insn,
208 struct pt_regs *regs)
209{
210 unsigned char buf[MAX_INSN_SIZE];
211 int x86_64 = !test_thread_flag(TIF_IA32);
212 int not_copied;
213 int nr_copied;
214
215 not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf));
216 nr_copied = sizeof(buf) - not_copied;
217 /*
218 * The decoder _should_ fail nicely if we pass it a short buffer.
219 * But, let's not depend on that implementation detail. If we
220 * did not get anything, just error out now.
221 */
222 if (!nr_copied)
223 return -EFAULT;
224 insn_init(insn, buf, nr_copied, x86_64);
225 insn_get_length(insn);
226 /*
227 * copy_from_user() tries to get as many bytes as we could see in
228 * the largest possible instruction. If the instruction we are
229 * after is shorter than that _and_ we attempt to copy from
230 * something unreadable, we might get a short read. This is OK
231 * as long as the read did not stop in the middle of the
232 * instruction. Check to see if we got a partial instruction.
233 */
234 if (nr_copied < insn->length)
235 return -EFAULT;
236
237 insn_get_opcode(insn);
238 /*
239 * We only _really_ need to decode bndcl/bndcn/bndcu
240 * Error out on anything else.
241 */
242 if (insn->opcode.bytes[0] != 0x0f)
243 goto bad_opcode;
244 if ((insn->opcode.bytes[1] != 0x1a) &&
245 (insn->opcode.bytes[1] != 0x1b))
246 goto bad_opcode;
247
248 return 0;
249bad_opcode:
250 return -EINVAL;
251}
252
253/*
254 * If a bounds overflow occurs then a #BR is generated. This
255 * function decodes MPX instructions to get violation address
256 * and set this address into extended struct siginfo.
257 *
258 * Note that this is not a super precise way of doing this.
259 * Userspace could have, by the time we get here, written
260 * anything it wants in to the instructions. We can not
261 * trust anything about it. They might not be valid
262 * instructions or might encode invalid registers, etc...
263 *
264 * The caller is expected to kfree() the returned siginfo_t.
265 */
266siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
267 struct xsave_struct *xsave_buf)
268{
269 struct insn insn;
270 uint8_t bndregno;
271 int err;
272 siginfo_t *info;
273
274 err = mpx_insn_decode(&insn, regs);
275 if (err)
276 goto err_out;
277
278 /*
279 * We know at this point that we are only dealing with
280 * MPX instructions.
281 */
282 insn_get_modrm(&insn);
283 bndregno = X86_MODRM_REG(insn.modrm.value);
284 if (bndregno > 3) {
285 err = -EINVAL;
286 goto err_out;
287 }
288 info = kzalloc(sizeof(*info), GFP_KERNEL);
289 if (!info) {
290 err = -ENOMEM;
291 goto err_out;
292 }
293 /*
294 * The registers are always 64-bit, but the upper 32
295 * bits are ignored in 32-bit mode. Also, note that the
296 * upper bounds are architecturally represented in 1's
297 * complement form.
298 *
299 * The 'unsigned long' cast is because the compiler
300 * complains when casting from integers to different-size
301 * pointers.
302 */
303 info->si_lower = (void __user *)(unsigned long)
304 (xsave_buf->bndreg[bndregno].lower_bound);
305 info->si_upper = (void __user *)(unsigned long)
306 (~xsave_buf->bndreg[bndregno].upper_bound);
307 info->si_addr_lsb = 0;
308 info->si_signo = SIGSEGV;
309 info->si_errno = 0;
310 info->si_code = SEGV_BNDERR;
311 info->si_addr = mpx_get_addr_ref(&insn, regs);
312 /*
313 * We were not able to extract an address from the instruction,
314 * probably because there was something invalid in it.
315 */
316 if (info->si_addr == (void *)-1) {
317 err = -EINVAL;
318 goto err_out;
319 }
320 return info;
321err_out:
322 return ERR_PTR(err);
323}