2 * mpx.c - Memory Protection eXtensions
4 * Copyright (c) 2014, Intel Corporation.
5 * Qiaowei Ren <qiaowei.ren@intel.com>
6 * Dave Hansen <dave.hansen@intel.com>
8 #include <linux/kernel.h>
9 #include <linux/slab.h>
10 #include <linux/syscalls.h>
11 #include <linux/sched/sysctl.h>
16 static const char *mpx_mapping_name(struct vm_area_struct *vma)
21 static struct vm_operations_struct mpx_vma_ops = {
22 .name = mpx_mapping_name,
26 * This is really a simplified "vm_mmap". it only handles MPX
27 * bounds tables (the bounds directory is user-allocated).
29 * Later on, we use the vma->vm_ops to uniquely identify these
32 static unsigned long mpx_mmap(unsigned long len)
35 unsigned long addr, pgoff;
36 struct mm_struct *mm = current->mm;
38 struct vm_area_struct *vma;
40 /* Only bounds table and bounds directory can be allocated here */
41 if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES)
44 down_write(&mm->mmap_sem);
46 /* Too many mappings? */
47 if (mm->map_count > sysctl_max_map_count) {
52 /* Obtain the address to map to. we verify (or select) it and ensure
53 * that it represents a valid section of the address space.
55 addr = get_unmapped_area(NULL, 0, len, 0, MAP_ANONYMOUS | MAP_PRIVATE);
56 if (addr & ~PAGE_MASK) {
61 vm_flags = VM_READ | VM_WRITE | VM_MPX |
62 mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
64 /* Set pgoff according to addr for anon_vma */
65 pgoff = addr >> PAGE_SHIFT;
67 ret = mmap_region(NULL, addr, len, vm_flags, pgoff);
68 if (IS_ERR_VALUE(ret))
71 vma = find_vma(mm, ret);
76 vma->vm_ops = &mpx_vma_ops;
78 if (vm_flags & VM_LOCKED) {
79 up_write(&mm->mmap_sem);
80 mm_populate(ret, len);
85 up_write(&mm->mmap_sem);
95 static unsigned long get_reg_offset(struct insn *insn, struct pt_regs *regs,
100 static const int regoff[] = {
101 offsetof(struct pt_regs, ax),
102 offsetof(struct pt_regs, cx),
103 offsetof(struct pt_regs, dx),
104 offsetof(struct pt_regs, bx),
105 offsetof(struct pt_regs, sp),
106 offsetof(struct pt_regs, bp),
107 offsetof(struct pt_regs, si),
108 offsetof(struct pt_regs, di),
110 offsetof(struct pt_regs, r8),
111 offsetof(struct pt_regs, r9),
112 offsetof(struct pt_regs, r10),
113 offsetof(struct pt_regs, r11),
114 offsetof(struct pt_regs, r12),
115 offsetof(struct pt_regs, r13),
116 offsetof(struct pt_regs, r14),
117 offsetof(struct pt_regs, r15),
120 int nr_registers = ARRAY_SIZE(regoff);
122 * Don't possibly decode a 32-bit instructions as
123 * reading a 64-bit-only register.
125 if (IS_ENABLED(CONFIG_X86_64) && !insn->x86_64)
130 regno = X86_MODRM_RM(insn->modrm.value);
131 if (X86_REX_B(insn->rex_prefix.value) == 1)
136 regno = X86_SIB_INDEX(insn->sib.value);
137 if (X86_REX_X(insn->rex_prefix.value) == 1)
142 regno = X86_SIB_BASE(insn->sib.value);
143 if (X86_REX_B(insn->rex_prefix.value) == 1)
148 pr_err("invalid register type");
153 if (regno > nr_registers) {
154 WARN_ONCE(1, "decoded an instruction with an invalid register");
157 return regoff[regno];
161 * return the address being referenced be instruction
162 * for rm=3 returning the content of the rm reg
163 * for rm!=3 calculates the address using SIB and Disp
165 static void __user *mpx_get_addr_ref(struct insn *insn, struct pt_regs *regs)
167 unsigned long addr, addr_offset;
168 unsigned long base, base_offset;
169 unsigned long indx, indx_offset;
172 insn_get_modrm(insn);
174 sib = insn->sib.value;
176 if (X86_MODRM_MOD(insn->modrm.value) == 3) {
177 addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM);
180 addr = regs_get_register(regs, addr_offset);
182 if (insn->sib.nbytes) {
183 base_offset = get_reg_offset(insn, regs, REG_TYPE_BASE);
187 indx_offset = get_reg_offset(insn, regs, REG_TYPE_INDEX);
191 base = regs_get_register(regs, base_offset);
192 indx = regs_get_register(regs, indx_offset);
193 addr = base + indx * (1 << X86_SIB_SCALE(sib));
195 addr_offset = get_reg_offset(insn, regs, REG_TYPE_RM);
198 addr = regs_get_register(regs, addr_offset);
200 addr += insn->displacement.value;
202 return (void __user *)addr;
204 return (void __user *)-1;
207 static int mpx_insn_decode(struct insn *insn,
208 struct pt_regs *regs)
210 unsigned char buf[MAX_INSN_SIZE];
211 int x86_64 = !test_thread_flag(TIF_IA32);
215 not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf));
216 nr_copied = sizeof(buf) - not_copied;
218 * The decoder _should_ fail nicely if we pass it a short buffer.
219 * But, let's not depend on that implementation detail. If we
220 * did not get anything, just error out now.
224 insn_init(insn, buf, nr_copied, x86_64);
225 insn_get_length(insn);
227 * copy_from_user() tries to get as many bytes as we could see in
228 * the largest possible instruction. If the instruction we are
229 * after is shorter than that _and_ we attempt to copy from
230 * something unreadable, we might get a short read. This is OK
231 * as long as the read did not stop in the middle of the
232 * instruction. Check to see if we got a partial instruction.
234 if (nr_copied < insn->length)
237 insn_get_opcode(insn);
239 * We only _really_ need to decode bndcl/bndcn/bndcu
240 * Error out on anything else.
242 if (insn->opcode.bytes[0] != 0x0f)
244 if ((insn->opcode.bytes[1] != 0x1a) &&
245 (insn->opcode.bytes[1] != 0x1b))
254 * If a bounds overflow occurs then a #BR is generated. This
255 * function decodes MPX instructions to get violation address
256 * and set this address into extended struct siginfo.
258 * Note that this is not a super precise way of doing this.
259 * Userspace could have, by the time we get here, written
260 * anything it wants in to the instructions. We can not
261 * trust anything about it. They might not be valid
262 * instructions or might encode invalid registers, etc...
264 * The caller is expected to kfree() the returned siginfo_t.
266 siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
267 struct xsave_struct *xsave_buf)
274 err = mpx_insn_decode(&insn, regs);
279 * We know at this point that we are only dealing with
282 insn_get_modrm(&insn);
283 bndregno = X86_MODRM_REG(insn.modrm.value);
288 info = kzalloc(sizeof(*info), GFP_KERNEL);
294 * The registers are always 64-bit, but the upper 32
295 * bits are ignored in 32-bit mode. Also, note that the
296 * upper bounds are architecturally represented in 1's
299 * The 'unsigned long' cast is because the compiler
300 * complains when casting from integers to different-size
303 info->si_lower = (void __user *)(unsigned long)
304 (xsave_buf->bndreg[bndregno].lower_bound);
305 info->si_upper = (void __user *)(unsigned long)
306 (~xsave_buf->bndreg[bndregno].upper_bound);
307 info->si_addr_lsb = 0;
308 info->si_signo = SIGSEGV;
310 info->si_code = SEGV_BNDERR;
311 info->si_addr = mpx_get_addr_ref(&insn, regs);
313 * We were not able to extract an address from the instruction,
314 * probably because there was something invalid in it.
316 if (info->si_addr == (void *)-1) {