1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
12 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
14 * Avi Kivity <avi@qumranet.com>
15 * Yaniv Kamay <yaniv@qumranet.com>
17 * This work is licensed under the terms of the GNU GPL, version 2. See
18 * the COPYING file in the top-level directory.
20 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
23 #include <linux/kvm_host.h>
24 #include "kvm_cache_regs.h"
25 #include <asm/kvm_emulate.h>
26 #include <linux/stringify.h>
27 #include <asm/debugreg.h>
36 #define OpImplicit 1ull /* No generic decode */
37 #define OpReg 2ull /* Register */
38 #define OpMem 3ull /* Memory */
39 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
40 #define OpDI 5ull /* ES:DI/EDI/RDI */
41 #define OpMem64 6ull /* Memory, 64-bit */
42 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
43 #define OpDX 8ull /* DX register */
44 #define OpCL 9ull /* CL register (for shifts) */
45 #define OpImmByte 10ull /* 8-bit sign extended immediate */
46 #define OpOne 11ull /* Implied 1 */
47 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
48 #define OpMem16 13ull /* Memory operand (16-bit). */
49 #define OpMem32 14ull /* Memory operand (32-bit). */
50 #define OpImmU 15ull /* Immediate operand, zero extended */
51 #define OpSI 16ull /* SI/ESI/RSI */
52 #define OpImmFAddr 17ull /* Immediate far address */
53 #define OpMemFAddr 18ull /* Far address in memory */
54 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
55 #define OpES 20ull /* ES */
56 #define OpCS 21ull /* CS */
57 #define OpSS 22ull /* SS */
58 #define OpDS 23ull /* DS */
59 #define OpFS 24ull /* FS */
60 #define OpGS 25ull /* GS */
61 #define OpMem8 26ull /* 8-bit zero extended memory operand */
62 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
63 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
64 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
65 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
67 #define OpBits 5 /* Width of operand field */
68 #define OpMask ((1ull << OpBits) - 1)
71 * Opcode effective-address decode tables.
72 * Note that we only emulate instructions that have at least one memory
73 * operand (excluding implicit stack references). We assume that stack
74 * references and instruction fetches will never occur in special memory
75 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
79 /* Operand sizes: 8-bit operands or specified/overridden size. */
80 #define ByteOp (1<<0) /* 8-bit operands. */
81 /* Destination operand type. */
83 #define ImplicitOps (OpImplicit << DstShift)
84 #define DstReg (OpReg << DstShift)
85 #define DstMem (OpMem << DstShift)
86 #define DstAcc (OpAcc << DstShift)
87 #define DstDI (OpDI << DstShift)
88 #define DstMem64 (OpMem64 << DstShift)
89 #define DstMem16 (OpMem16 << DstShift)
90 #define DstImmUByte (OpImmUByte << DstShift)
91 #define DstDX (OpDX << DstShift)
92 #define DstAccLo (OpAccLo << DstShift)
93 #define DstMask (OpMask << DstShift)
94 /* Source operand type. */
96 #define SrcNone (OpNone << SrcShift)
97 #define SrcReg (OpReg << SrcShift)
98 #define SrcMem (OpMem << SrcShift)
99 #define SrcMem16 (OpMem16 << SrcShift)
100 #define SrcMem32 (OpMem32 << SrcShift)
101 #define SrcImm (OpImm << SrcShift)
102 #define SrcImmByte (OpImmByte << SrcShift)
103 #define SrcOne (OpOne << SrcShift)
104 #define SrcImmUByte (OpImmUByte << SrcShift)
105 #define SrcImmU (OpImmU << SrcShift)
106 #define SrcSI (OpSI << SrcShift)
107 #define SrcXLat (OpXLat << SrcShift)
108 #define SrcImmFAddr (OpImmFAddr << SrcShift)
109 #define SrcMemFAddr (OpMemFAddr << SrcShift)
110 #define SrcAcc (OpAcc << SrcShift)
111 #define SrcImmU16 (OpImmU16 << SrcShift)
112 #define SrcImm64 (OpImm64 << SrcShift)
113 #define SrcDX (OpDX << SrcShift)
114 #define SrcMem8 (OpMem8 << SrcShift)
115 #define SrcAccHi (OpAccHi << SrcShift)
116 #define SrcMask (OpMask << SrcShift)
117 #define BitOp (1<<11)
118 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
119 #define String (1<<13) /* String instruction (rep capable) */
120 #define Stack (1<<14) /* Stack instruction (push/pop) */
121 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
122 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
123 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
124 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
125 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
126 #define Escape (5<<15) /* Escape to coprocessor instruction */
127 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
128 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
129 #define Sse (1<<18) /* SSE Vector instruction */
130 /* Generic ModRM decode. */
131 #define ModRM (1<<19)
132 /* Destination is only written; never read. */
135 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
136 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
137 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
138 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
139 #define Undefined (1<<25) /* No Such Instruction */
140 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
141 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
143 #define PageTable (1 << 29) /* instruction used to write page table */
144 #define NotImpl (1 << 30) /* instruction is not implemented */
145 /* Source 2 operand type */
146 #define Src2Shift (31)
147 #define Src2None (OpNone << Src2Shift)
148 #define Src2Mem (OpMem << Src2Shift)
149 #define Src2CL (OpCL << Src2Shift)
150 #define Src2ImmByte (OpImmByte << Src2Shift)
151 #define Src2One (OpOne << Src2Shift)
152 #define Src2Imm (OpImm << Src2Shift)
153 #define Src2ES (OpES << Src2Shift)
154 #define Src2CS (OpCS << Src2Shift)
155 #define Src2SS (OpSS << Src2Shift)
156 #define Src2DS (OpDS << Src2Shift)
157 #define Src2FS (OpFS << Src2Shift)
158 #define Src2GS (OpGS << Src2Shift)
159 #define Src2Mask (OpMask << Src2Shift)
160 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
161 #define AlignMask ((u64)7 << 41)
162 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
163 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
164 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
165 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
166 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
167 #define NoWrite ((u64)1 << 45) /* No writeback */
168 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
169 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
170 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
171 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
172 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
173 #define NearBranch ((u64)1 << 52) /* Near branches */
174 #define No16 ((u64)1 << 53) /* No 16 bit operand */
175 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
177 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
179 #define X2(x...) x, x
180 #define X3(x...) X2(x), x
181 #define X4(x...) X2(x), X2(x)
182 #define X5(x...) X4(x), x
183 #define X6(x...) X4(x), X2(x)
184 #define X7(x...) X4(x), X3(x)
185 #define X8(x...) X4(x), X4(x)
186 #define X16(x...) X8(x), X8(x)
188 #define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
189 #define FASTOP_SIZE 8
192 * fastop functions have a special calling convention:
197 * flags: rflags (in/out)
198 * ex: rsi (in:fastop pointer, out:zero if exception)
200 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
201 * different operand sizes can be reached by calculation, rather than a jump
202 * table (which would be bigger than the code).
204 * fastop functions are declared as taking a never-defined fastop parameter,
205 * so they can't be called from C directly.
214 int (*execute)(struct x86_emulate_ctxt *ctxt);
215 const struct opcode *group;
216 const struct group_dual *gdual;
217 const struct gprefix *gprefix;
218 const struct escape *esc;
219 const struct instr_dual *idual;
220 const struct mode_dual *mdual;
221 void (*fastop)(struct fastop *fake);
223 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
227 struct opcode mod012[8];
228 struct opcode mod3[8];
232 struct opcode pfx_no;
233 struct opcode pfx_66;
234 struct opcode pfx_f2;
235 struct opcode pfx_f3;
240 struct opcode high[64];
244 struct opcode mod012;
249 struct opcode mode32;
250 struct opcode mode64;
253 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
255 enum x86_transfer_type {
257 X86_TRANSFER_CALL_JMP,
259 X86_TRANSFER_TASK_SWITCH,
262 static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
264 if (!(ctxt->regs_valid & (1 << nr))) {
265 ctxt->regs_valid |= 1 << nr;
266 ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
268 return ctxt->_regs[nr];
271 static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
273 ctxt->regs_valid |= 1 << nr;
274 ctxt->regs_dirty |= 1 << nr;
275 return &ctxt->_regs[nr];
278 static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
281 return reg_write(ctxt, nr);
284 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
288 for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
289 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
292 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
294 ctxt->regs_dirty = 0;
295 ctxt->regs_valid = 0;
299 * These EFLAGS bits are restored from saved value during emulation, and
300 * any changes are written back to the saved value after emulation.
302 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
303 X86_EFLAGS_PF|X86_EFLAGS_CF)
311 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
313 #define FOP_FUNC(name) \
314 ".align " __stringify(FASTOP_SIZE) " \n\t" \
315 ".type " name ", @function \n\t" \
318 #define FOP_RET "ret \n\t"
320 #define FOP_START(op) \
321 extern void em_##op(struct fastop *fake); \
322 asm(".pushsection .text, \"ax\" \n\t" \
323 ".global em_" #op " \n\t" \
330 FOP_FUNC(__stringify(__UNIQUE_ID(nop))) \
333 #define FOP1E(op, dst) \
334 FOP_FUNC(#op "_" #dst) \
335 "10: " #op " %" #dst " \n\t" FOP_RET
337 #define FOP1EEX(op, dst) \
338 FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
340 #define FASTOP1(op) \
345 ON64(FOP1E(op##q, rax)) \
348 /* 1-operand, using src2 (for MUL/DIV r/m) */
349 #define FASTOP1SRC2(op, name) \
354 ON64(FOP1E(op, rcx)) \
357 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
358 #define FASTOP1SRC2EX(op, name) \
363 ON64(FOP1EEX(op, rcx)) \
366 #define FOP2E(op, dst, src) \
367 FOP_FUNC(#op "_" #dst "_" #src) \
368 #op " %" #src ", %" #dst " \n\t" FOP_RET
370 #define FASTOP2(op) \
372 FOP2E(op##b, al, dl) \
373 FOP2E(op##w, ax, dx) \
374 FOP2E(op##l, eax, edx) \
375 ON64(FOP2E(op##q, rax, rdx)) \
378 /* 2 operand, word only */
379 #define FASTOP2W(op) \
382 FOP2E(op##w, ax, dx) \
383 FOP2E(op##l, eax, edx) \
384 ON64(FOP2E(op##q, rax, rdx)) \
387 /* 2 operand, src is CL */
388 #define FASTOP2CL(op) \
390 FOP2E(op##b, al, cl) \
391 FOP2E(op##w, ax, cl) \
392 FOP2E(op##l, eax, cl) \
393 ON64(FOP2E(op##q, rax, cl)) \
396 /* 2 operand, src and dest are reversed */
397 #define FASTOP2R(op, name) \
399 FOP2E(op##b, dl, al) \
400 FOP2E(op##w, dx, ax) \
401 FOP2E(op##l, edx, eax) \
402 ON64(FOP2E(op##q, rdx, rax)) \
405 #define FOP3E(op, dst, src, src2) \
406 FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
407 #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
409 /* 3-operand, word-only, src2=cl */
410 #define FASTOP3WCL(op) \
413 FOP3E(op##w, ax, dx, cl) \
414 FOP3E(op##l, eax, edx, cl) \
415 ON64(FOP3E(op##q, rax, rdx, cl)) \
418 /* Special case for SETcc - 1 instruction per cc */
419 #define FOP_SETCC(op) \
421 ".type " #op ", @function \n\t" \
426 asm(".global kvm_fastop_exception \n"
427 "kvm_fastop_exception: xor %esi, %esi; ret");
448 FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
452 * XXX: inoutclob user must know where the argument is being expanded.
453 * Relying on CC_HAVE_ASM_GOTO would allow us to remove _fault.
455 #define asm_safe(insn, inoutclob...) \
459 asm volatile("1:" insn "\n" \
461 ".pushsection .fixup, \"ax\"\n" \
462 "3: movl $1, %[_fault]\n" \
465 _ASM_EXTABLE(1b, 3b) \
466 : [_fault] "+qm"(_fault) inoutclob ); \
468 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
471 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
472 enum x86_intercept intercept,
473 enum x86_intercept_stage stage)
475 struct x86_instruction_info info = {
476 .intercept = intercept,
477 .rep_prefix = ctxt->rep_prefix,
478 .modrm_mod = ctxt->modrm_mod,
479 .modrm_reg = ctxt->modrm_reg,
480 .modrm_rm = ctxt->modrm_rm,
481 .src_val = ctxt->src.val64,
482 .dst_val = ctxt->dst.val64,
483 .src_bytes = ctxt->src.bytes,
484 .dst_bytes = ctxt->dst.bytes,
485 .ad_bytes = ctxt->ad_bytes,
486 .next_rip = ctxt->eip,
489 return ctxt->ops->intercept(ctxt, &info, stage);
492 static void assign_masked(ulong *dest, ulong src, ulong mask)
494 *dest = (*dest & ~mask) | (src & mask);
497 static void assign_register(unsigned long *reg, u64 val, int bytes)
499 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
502 *(u8 *)reg = (u8)val;
505 *(u16 *)reg = (u16)val;
509 break; /* 64b: zero-extend */
516 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
518 return (1UL << (ctxt->ad_bytes << 3)) - 1;
521 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
524 struct desc_struct ss;
526 if (ctxt->mode == X86EMUL_MODE_PROT64)
528 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
529 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
532 static int stack_size(struct x86_emulate_ctxt *ctxt)
534 return (__fls(stack_mask(ctxt)) + 1) >> 3;
537 /* Access/update address held in a register, based on addressing mode. */
538 static inline unsigned long
539 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
541 if (ctxt->ad_bytes == sizeof(unsigned long))
544 return reg & ad_mask(ctxt);
547 static inline unsigned long
548 register_address(struct x86_emulate_ctxt *ctxt, int reg)
550 return address_mask(ctxt, reg_read(ctxt, reg));
553 static void masked_increment(ulong *reg, ulong mask, int inc)
555 assign_masked(reg, *reg + inc, mask);
559 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
561 ulong *preg = reg_rmw(ctxt, reg);
563 assign_register(preg, *preg + inc, ctxt->ad_bytes);
566 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
568 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
571 static u32 desc_limit_scaled(struct desc_struct *desc)
573 u32 limit = get_desc_limit(desc);
575 return desc->g ? (limit << 12) | 0xfff : limit;
578 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
580 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
583 return ctxt->ops->get_cached_segment_base(ctxt, seg);
586 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
587 u32 error, bool valid)
590 ctxt->exception.vector = vec;
591 ctxt->exception.error_code = error;
592 ctxt->exception.error_code_valid = valid;
593 return X86EMUL_PROPAGATE_FAULT;
596 static int emulate_db(struct x86_emulate_ctxt *ctxt)
598 return emulate_exception(ctxt, DB_VECTOR, 0, false);
601 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
603 return emulate_exception(ctxt, GP_VECTOR, err, true);
606 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
608 return emulate_exception(ctxt, SS_VECTOR, err, true);
611 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
613 return emulate_exception(ctxt, UD_VECTOR, 0, false);
616 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
618 return emulate_exception(ctxt, TS_VECTOR, err, true);
621 static int emulate_de(struct x86_emulate_ctxt *ctxt)
623 return emulate_exception(ctxt, DE_VECTOR, 0, false);
626 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
628 return emulate_exception(ctxt, NM_VECTOR, 0, false);
631 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
634 struct desc_struct desc;
636 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
640 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
645 struct desc_struct desc;
647 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
648 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
652 * x86 defines three classes of vector instructions: explicitly
653 * aligned, explicitly unaligned, and the rest, which change behaviour
654 * depending on whether they're AVX encoded or not.
656 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
657 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
658 * 512 bytes of data must be aligned to a 16 byte boundary.
660 static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
662 u64 alignment = ctxt->d & AlignMask;
664 if (likely(size < 16))
679 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
680 struct segmented_address addr,
681 unsigned *max_size, unsigned size,
682 bool write, bool fetch,
683 enum x86emul_mode mode, ulong *linear)
685 struct desc_struct desc;
691 la = seg_base(ctxt, addr.seg) + addr.ea;
694 case X86EMUL_MODE_PROT64:
696 if (is_noncanonical_address(la))
699 *max_size = min_t(u64, ~0u, (1ull << 48) - la);
700 if (size > *max_size)
704 *linear = la = (u32)la;
705 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
709 /* code segment in protected mode or read-only data segment */
710 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
711 || !(desc.type & 2)) && write)
713 /* unreadable code segment */
714 if (!fetch && (desc.type & 8) && !(desc.type & 2))
716 lim = desc_limit_scaled(&desc);
717 if (!(desc.type & 8) && (desc.type & 4)) {
718 /* expand-down segment */
721 lim = desc.d ? 0xffffffff : 0xffff;
725 if (lim == 0xffffffff)
728 *max_size = (u64)lim + 1 - addr.ea;
729 if (size > *max_size)
734 if (la & (insn_alignment(ctxt, size) - 1))
735 return emulate_gp(ctxt, 0);
736 return X86EMUL_CONTINUE;
738 if (addr.seg == VCPU_SREG_SS)
739 return emulate_ss(ctxt, 0);
741 return emulate_gp(ctxt, 0);
744 static int linearize(struct x86_emulate_ctxt *ctxt,
745 struct segmented_address addr,
746 unsigned size, bool write,
750 return __linearize(ctxt, addr, &max_size, size, write, false,
754 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
755 enum x86emul_mode mode)
760 struct segmented_address addr = { .seg = VCPU_SREG_CS,
763 if (ctxt->op_bytes != sizeof(unsigned long))
764 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
765 rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
766 if (rc == X86EMUL_CONTINUE)
767 ctxt->_eip = addr.ea;
771 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
773 return assign_eip(ctxt, dst, ctxt->mode);
776 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
777 const struct desc_struct *cs_desc)
779 enum x86emul_mode mode = ctxt->mode;
783 if (ctxt->mode >= X86EMUL_MODE_PROT16) {
787 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
789 mode = X86EMUL_MODE_PROT64;
791 mode = X86EMUL_MODE_PROT32; /* temporary value */
794 if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
795 mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
796 rc = assign_eip(ctxt, dst, mode);
797 if (rc == X86EMUL_CONTINUE)
802 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
804 return assign_eip_near(ctxt, ctxt->_eip + rel);
807 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
808 struct segmented_address addr,
815 rc = linearize(ctxt, addr, size, false, &linear);
816 if (rc != X86EMUL_CONTINUE)
818 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
822 * Prefetch the remaining bytes of the instruction without crossing page
823 * boundary if they are not in fetch_cache yet.
825 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
828 unsigned size, max_size;
829 unsigned long linear;
830 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
831 struct segmented_address addr = { .seg = VCPU_SREG_CS,
832 .ea = ctxt->eip + cur_size };
835 * We do not know exactly how many bytes will be needed, and
836 * __linearize is expensive, so fetch as much as possible. We
837 * just have to avoid going beyond the 15 byte limit, the end
838 * of the segment, or the end of the page.
840 * __linearize is called with size 0 so that it does not do any
841 * boundary check itself. Instead, we use max_size to check
844 rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
846 if (unlikely(rc != X86EMUL_CONTINUE))
849 size = min_t(unsigned, 15UL ^ cur_size, max_size);
850 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
853 * One instruction can only straddle two pages,
854 * and one has been loaded at the beginning of
855 * x86_decode_insn. So, if not enough bytes
856 * still, we must have hit the 15-byte boundary.
858 if (unlikely(size < op_size))
859 return emulate_gp(ctxt, 0);
861 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
862 size, &ctxt->exception);
863 if (unlikely(rc != X86EMUL_CONTINUE))
865 ctxt->fetch.end += size;
866 return X86EMUL_CONTINUE;
869 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
872 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
874 if (unlikely(done_size < size))
875 return __do_insn_fetch_bytes(ctxt, size - done_size);
877 return X86EMUL_CONTINUE;
880 /* Fetch next part of the instruction being emulated. */
881 #define insn_fetch(_type, _ctxt) \
884 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
885 if (rc != X86EMUL_CONTINUE) \
887 ctxt->_eip += sizeof(_type); \
888 _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \
889 ctxt->fetch.ptr += sizeof(_type); \
893 #define insn_fetch_arr(_arr, _size, _ctxt) \
895 rc = do_insn_fetch_bytes(_ctxt, _size); \
896 if (rc != X86EMUL_CONTINUE) \
898 ctxt->_eip += (_size); \
899 memcpy(_arr, ctxt->fetch.ptr, _size); \
900 ctxt->fetch.ptr += (_size); \
904 * Given the 'reg' portion of a ModRM byte, and a register block, return a
905 * pointer into the block that addresses the relevant register.
906 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
908 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
912 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
914 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
915 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
917 p = reg_rmw(ctxt, modrm_reg);
921 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
922 struct segmented_address addr,
923 u16 *size, unsigned long *address, int op_bytes)
930 rc = segmented_read_std(ctxt, addr, size, 2);
931 if (rc != X86EMUL_CONTINUE)
934 rc = segmented_read_std(ctxt, addr, address, op_bytes);
948 FASTOP1SRC2(mul, mul_ex);
949 FASTOP1SRC2(imul, imul_ex);
950 FASTOP1SRC2EX(div, div_ex);
951 FASTOP1SRC2EX(idiv, idiv_ex);
980 FASTOP2R(cmp, cmp_r);
982 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
984 /* If src is zero, do not writeback, but update flags */
985 if (ctxt->src.val == 0)
986 ctxt->dst.type = OP_NONE;
987 return fastop(ctxt, em_bsf);
990 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
992 /* If src is zero, do not writeback, but update flags */
993 if (ctxt->src.val == 0)
994 ctxt->dst.type = OP_NONE;
995 return fastop(ctxt, em_bsr);
998 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1001 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
1003 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1004 asm("push %[flags]; popf; call *%[fastop]"
1005 : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
1009 static void fetch_register_operand(struct operand *op)
1011 switch (op->bytes) {
1013 op->val = *(u8 *)op->addr.reg;
1016 op->val = *(u16 *)op->addr.reg;
1019 op->val = *(u32 *)op->addr.reg;
1022 op->val = *(u64 *)op->addr.reg;
1027 static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
1029 ctxt->ops->get_fpu(ctxt);
1031 case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
1032 case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
1033 case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
1034 case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
1035 case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
1036 case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
1037 case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
1038 case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
1039 #ifdef CONFIG_X86_64
1040 case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
1041 case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
1042 case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
1043 case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
1044 case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
1045 case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
1046 case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
1047 case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
1051 ctxt->ops->put_fpu(ctxt);
1054 static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
1057 ctxt->ops->get_fpu(ctxt);
1059 case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
1060 case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
1061 case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
1062 case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
1063 case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
1064 case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
1065 case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
1066 case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
1067 #ifdef CONFIG_X86_64
1068 case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
1069 case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
1070 case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
1071 case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
1072 case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
1073 case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
1074 case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
1075 case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
1079 ctxt->ops->put_fpu(ctxt);
1082 static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1084 ctxt->ops->get_fpu(ctxt);
1086 case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
1087 case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
1088 case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
1089 case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
1090 case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
1091 case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
1092 case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
1093 case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
1096 ctxt->ops->put_fpu(ctxt);
1099 static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1101 ctxt->ops->get_fpu(ctxt);
1103 case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
1104 case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
1105 case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
1106 case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
1107 case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
1108 case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
1109 case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
1110 case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
1113 ctxt->ops->put_fpu(ctxt);
1116 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1118 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1119 return emulate_nm(ctxt);
1121 ctxt->ops->get_fpu(ctxt);
1122 asm volatile("fninit");
1123 ctxt->ops->put_fpu(ctxt);
1124 return X86EMUL_CONTINUE;
1127 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1131 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1132 return emulate_nm(ctxt);
1134 ctxt->ops->get_fpu(ctxt);
1135 asm volatile("fnstcw %0": "+m"(fcw));
1136 ctxt->ops->put_fpu(ctxt);
1138 ctxt->dst.val = fcw;
1140 return X86EMUL_CONTINUE;
1143 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1147 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1148 return emulate_nm(ctxt);
1150 ctxt->ops->get_fpu(ctxt);
1151 asm volatile("fnstsw %0": "+m"(fsw));
1152 ctxt->ops->put_fpu(ctxt);
1154 ctxt->dst.val = fsw;
1156 return X86EMUL_CONTINUE;
1159 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1162 unsigned reg = ctxt->modrm_reg;
1164 if (!(ctxt->d & ModRM))
1165 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1167 if (ctxt->d & Sse) {
1171 read_sse_reg(ctxt, &op->vec_val, reg);
1174 if (ctxt->d & Mmx) {
1183 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1184 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1186 fetch_register_operand(op);
1187 op->orig_val = op->val;
1190 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1192 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1193 ctxt->modrm_seg = VCPU_SREG_SS;
1196 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1200 int index_reg, base_reg, scale;
1201 int rc = X86EMUL_CONTINUE;
1204 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1205 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1206 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1208 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1209 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1210 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1211 ctxt->modrm_seg = VCPU_SREG_DS;
1213 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1215 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1216 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1218 if (ctxt->d & Sse) {
1221 op->addr.xmm = ctxt->modrm_rm;
1222 read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
1225 if (ctxt->d & Mmx) {
1228 op->addr.mm = ctxt->modrm_rm & 7;
1231 fetch_register_operand(op);
1237 if (ctxt->ad_bytes == 2) {
1238 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1239 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1240 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1241 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1243 /* 16-bit ModR/M decode. */
1244 switch (ctxt->modrm_mod) {
1246 if (ctxt->modrm_rm == 6)
1247 modrm_ea += insn_fetch(u16, ctxt);
1250 modrm_ea += insn_fetch(s8, ctxt);
1253 modrm_ea += insn_fetch(u16, ctxt);
1256 switch (ctxt->modrm_rm) {
1258 modrm_ea += bx + si;
1261 modrm_ea += bx + di;
1264 modrm_ea += bp + si;
1267 modrm_ea += bp + di;
1276 if (ctxt->modrm_mod != 0)
1283 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1284 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1285 ctxt->modrm_seg = VCPU_SREG_SS;
1286 modrm_ea = (u16)modrm_ea;
1288 /* 32/64-bit ModR/M decode. */
1289 if ((ctxt->modrm_rm & 7) == 4) {
1290 sib = insn_fetch(u8, ctxt);
1291 index_reg |= (sib >> 3) & 7;
1292 base_reg |= sib & 7;
1295 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1296 modrm_ea += insn_fetch(s32, ctxt);
1298 modrm_ea += reg_read(ctxt, base_reg);
1299 adjust_modrm_seg(ctxt, base_reg);
1300 /* Increment ESP on POP [ESP] */
1301 if ((ctxt->d & IncSP) &&
1302 base_reg == VCPU_REGS_RSP)
1303 modrm_ea += ctxt->op_bytes;
1306 modrm_ea += reg_read(ctxt, index_reg) << scale;
1307 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1308 modrm_ea += insn_fetch(s32, ctxt);
1309 if (ctxt->mode == X86EMUL_MODE_PROT64)
1310 ctxt->rip_relative = 1;
1312 base_reg = ctxt->modrm_rm;
1313 modrm_ea += reg_read(ctxt, base_reg);
1314 adjust_modrm_seg(ctxt, base_reg);
1316 switch (ctxt->modrm_mod) {
1318 modrm_ea += insn_fetch(s8, ctxt);
1321 modrm_ea += insn_fetch(s32, ctxt);
1325 op->addr.mem.ea = modrm_ea;
1326 if (ctxt->ad_bytes != 8)
1327 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1333 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1336 int rc = X86EMUL_CONTINUE;
1339 switch (ctxt->ad_bytes) {
1341 op->addr.mem.ea = insn_fetch(u16, ctxt);
1344 op->addr.mem.ea = insn_fetch(u32, ctxt);
1347 op->addr.mem.ea = insn_fetch(u64, ctxt);
1354 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1358 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1359 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1361 if (ctxt->src.bytes == 2)
1362 sv = (s16)ctxt->src.val & (s16)mask;
1363 else if (ctxt->src.bytes == 4)
1364 sv = (s32)ctxt->src.val & (s32)mask;
1366 sv = (s64)ctxt->src.val & (s64)mask;
1368 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1369 ctxt->dst.addr.mem.ea + (sv >> 3));
1372 /* only subword offset */
1373 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1376 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1377 unsigned long addr, void *dest, unsigned size)
1380 struct read_cache *mc = &ctxt->mem_read;
1382 if (mc->pos < mc->end)
1385 WARN_ON((mc->end + size) >= sizeof(mc->data));
1387 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1389 if (rc != X86EMUL_CONTINUE)
1395 memcpy(dest, mc->data + mc->pos, size);
1397 return X86EMUL_CONTINUE;
1400 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1401 struct segmented_address addr,
1408 rc = linearize(ctxt, addr, size, false, &linear);
1409 if (rc != X86EMUL_CONTINUE)
1411 return read_emulated(ctxt, linear, data, size);
1414 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1415 struct segmented_address addr,
1422 rc = linearize(ctxt, addr, size, true, &linear);
1423 if (rc != X86EMUL_CONTINUE)
1425 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1429 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1430 struct segmented_address addr,
1431 const void *orig_data, const void *data,
1437 rc = linearize(ctxt, addr, size, true, &linear);
1438 if (rc != X86EMUL_CONTINUE)
1440 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1441 size, &ctxt->exception);
1444 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1445 unsigned int size, unsigned short port,
1448 struct read_cache *rc = &ctxt->io_read;
1450 if (rc->pos == rc->end) { /* refill pio read ahead */
1451 unsigned int in_page, n;
1452 unsigned int count = ctxt->rep_prefix ?
1453 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1454 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1455 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1456 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1457 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1460 rc->pos = rc->end = 0;
1461 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1466 if (ctxt->rep_prefix && (ctxt->d & String) &&
1467 !(ctxt->eflags & X86_EFLAGS_DF)) {
1468 ctxt->dst.data = rc->data + rc->pos;
1469 ctxt->dst.type = OP_MEM_STR;
1470 ctxt->dst.count = (rc->end - rc->pos) / size;
1473 memcpy(dest, rc->data + rc->pos, size);
1479 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1480 u16 index, struct desc_struct *desc)
1485 ctxt->ops->get_idt(ctxt, &dt);
1487 if (dt.size < index * 8 + 7)
1488 return emulate_gp(ctxt, index << 3 | 0x2);
1490 addr = dt.address + index * 8;
1491 return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
1495 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1496 u16 selector, struct desc_ptr *dt)
1498 const struct x86_emulate_ops *ops = ctxt->ops;
1501 if (selector & 1 << 2) {
1502 struct desc_struct desc;
1505 memset (dt, 0, sizeof *dt);
1506 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1510 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1511 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1513 ops->get_gdt(ctxt, dt);
1516 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1517 u16 selector, ulong *desc_addr_p)
1520 u16 index = selector >> 3;
1523 get_descriptor_table_ptr(ctxt, selector, &dt);
1525 if (dt.size < index * 8 + 7)
1526 return emulate_gp(ctxt, selector & 0xfffc);
1528 addr = dt.address + index * 8;
1530 #ifdef CONFIG_X86_64
1531 if (addr >> 32 != 0) {
1534 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1535 if (!(efer & EFER_LMA))
1540 *desc_addr_p = addr;
1541 return X86EMUL_CONTINUE;
1544 /* allowed just for 8 bytes segments */
1545 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1546 u16 selector, struct desc_struct *desc,
1551 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1552 if (rc != X86EMUL_CONTINUE)
1555 return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
1559 /* allowed just for 8 bytes segments */
1560 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1561 u16 selector, struct desc_struct *desc)
1566 rc = get_descriptor_ptr(ctxt, selector, &addr);
1567 if (rc != X86EMUL_CONTINUE)
1570 return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
1574 /* Does not support long mode */
1575 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1576 u16 selector, int seg, u8 cpl,
1577 enum x86_transfer_type transfer,
1578 struct desc_struct *desc)
1580 struct desc_struct seg_desc, old_desc;
1582 unsigned err_vec = GP_VECTOR;
1584 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1590 memset(&seg_desc, 0, sizeof seg_desc);
1592 if (ctxt->mode == X86EMUL_MODE_REAL) {
1593 /* set real mode segment descriptor (keep limit etc. for
1595 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1596 set_desc_base(&seg_desc, selector << 4);
1598 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1599 /* VM86 needs a clean new segment descriptor */
1600 set_desc_base(&seg_desc, selector << 4);
1601 set_desc_limit(&seg_desc, 0xffff);
1611 /* NULL selector is not valid for TR, CS and SS (except for long mode) */
1612 if ((seg == VCPU_SREG_CS
1613 || (seg == VCPU_SREG_SS
1614 && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl))
1615 || seg == VCPU_SREG_TR)
1619 /* TR should be in GDT only */
1620 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1623 if (null_selector) /* for NULL selector skip all following checks */
1626 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1627 if (ret != X86EMUL_CONTINUE)
1630 err_code = selector & 0xfffc;
1631 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1634 /* can't load system descriptor into segment selector */
1635 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1636 if (transfer == X86_TRANSFER_CALL_JMP)
1637 return X86EMUL_UNHANDLEABLE;
1642 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1651 * segment is not a writable data segment or segment
1652 * selector's RPL != CPL or segment selector's RPL != CPL
1654 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1658 if (!(seg_desc.type & 8))
1661 if (seg_desc.type & 4) {
1667 if (rpl > cpl || dpl != cpl)
1670 /* in long-mode d/b must be clear if l is set */
1671 if (seg_desc.d && seg_desc.l) {
1674 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1675 if (efer & EFER_LMA)
1679 /* CS(RPL) <- CPL */
1680 selector = (selector & 0xfffc) | cpl;
1683 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1685 old_desc = seg_desc;
1686 seg_desc.type |= 2; /* busy */
1687 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1688 sizeof(seg_desc), &ctxt->exception);
1689 if (ret != X86EMUL_CONTINUE)
1692 case VCPU_SREG_LDTR:
1693 if (seg_desc.s || seg_desc.type != 2)
1696 default: /* DS, ES, FS, or GS */
1698 * segment is not a data or readable code segment or
1699 * ((segment is a data or nonconforming code segment)
1700 * and (both RPL and CPL > DPL))
1702 if ((seg_desc.type & 0xa) == 0x8 ||
1703 (((seg_desc.type & 0xc) != 0xc) &&
1704 (rpl > dpl && cpl > dpl)))
1710 /* mark segment as accessed */
1711 if (!(seg_desc.type & 1)) {
1713 ret = write_segment_descriptor(ctxt, selector,
1715 if (ret != X86EMUL_CONTINUE)
1718 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1719 ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
1720 sizeof(base3), &ctxt->exception);
1721 if (ret != X86EMUL_CONTINUE)
1723 if (is_noncanonical_address(get_desc_base(&seg_desc) |
1724 ((u64)base3 << 32)))
1725 return emulate_gp(ctxt, 0);
1728 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1731 return X86EMUL_CONTINUE;
1733 return emulate_exception(ctxt, err_vec, err_code, true);
1736 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1737 u16 selector, int seg)
1739 u8 cpl = ctxt->ops->cpl(ctxt);
1740 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1741 X86_TRANSFER_NONE, NULL);
1744 static void write_register_operand(struct operand *op)
1746 return assign_register(op->addr.reg, op->val, op->bytes);
1749 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1753 write_register_operand(op);
1756 if (ctxt->lock_prefix)
1757 return segmented_cmpxchg(ctxt,
1763 return segmented_write(ctxt,
1769 return segmented_write(ctxt,
1772 op->bytes * op->count);
1775 write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
1778 write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
1786 return X86EMUL_CONTINUE;
1789 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1791 struct segmented_address addr;
1793 rsp_increment(ctxt, -bytes);
1794 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1795 addr.seg = VCPU_SREG_SS;
1797 return segmented_write(ctxt, addr, data, bytes);
1800 static int em_push(struct x86_emulate_ctxt *ctxt)
1802 /* Disable writeback. */
1803 ctxt->dst.type = OP_NONE;
1804 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1807 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1808 void *dest, int len)
1811 struct segmented_address addr;
1813 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1814 addr.seg = VCPU_SREG_SS;
1815 rc = segmented_read(ctxt, addr, dest, len);
1816 if (rc != X86EMUL_CONTINUE)
1819 rsp_increment(ctxt, len);
1823 static int em_pop(struct x86_emulate_ctxt *ctxt)
1825 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1828 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1829 void *dest, int len)
1832 unsigned long val, change_mask;
1833 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1834 int cpl = ctxt->ops->cpl(ctxt);
1836 rc = emulate_pop(ctxt, &val, len);
1837 if (rc != X86EMUL_CONTINUE)
1840 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1841 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1842 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1843 X86_EFLAGS_AC | X86_EFLAGS_ID;
1845 switch(ctxt->mode) {
1846 case X86EMUL_MODE_PROT64:
1847 case X86EMUL_MODE_PROT32:
1848 case X86EMUL_MODE_PROT16:
1850 change_mask |= X86_EFLAGS_IOPL;
1852 change_mask |= X86_EFLAGS_IF;
1854 case X86EMUL_MODE_VM86:
1856 return emulate_gp(ctxt, 0);
1857 change_mask |= X86_EFLAGS_IF;
1859 default: /* real mode */
1860 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1864 *(unsigned long *)dest =
1865 (ctxt->eflags & ~change_mask) | (val & change_mask);
1870 static int em_popf(struct x86_emulate_ctxt *ctxt)
1872 ctxt->dst.type = OP_REG;
1873 ctxt->dst.addr.reg = &ctxt->eflags;
1874 ctxt->dst.bytes = ctxt->op_bytes;
1875 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1878 static int em_enter(struct x86_emulate_ctxt *ctxt)
1881 unsigned frame_size = ctxt->src.val;
1882 unsigned nesting_level = ctxt->src2.val & 31;
1886 return X86EMUL_UNHANDLEABLE;
1888 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1889 rc = push(ctxt, &rbp, stack_size(ctxt));
1890 if (rc != X86EMUL_CONTINUE)
1892 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1894 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1895 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1897 return X86EMUL_CONTINUE;
1900 static int em_leave(struct x86_emulate_ctxt *ctxt)
1902 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1904 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1907 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1909 int seg = ctxt->src2.val;
1911 ctxt->src.val = get_segment_selector(ctxt, seg);
1912 if (ctxt->op_bytes == 4) {
1913 rsp_increment(ctxt, -2);
1917 return em_push(ctxt);
1920 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1922 int seg = ctxt->src2.val;
1923 unsigned long selector;
1926 rc = emulate_pop(ctxt, &selector, 2);
1927 if (rc != X86EMUL_CONTINUE)
1930 if (ctxt->modrm_reg == VCPU_SREG_SS)
1931 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1932 if (ctxt->op_bytes > 2)
1933 rsp_increment(ctxt, ctxt->op_bytes - 2);
1935 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1939 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1941 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1942 int rc = X86EMUL_CONTINUE;
1943 int reg = VCPU_REGS_RAX;
1945 while (reg <= VCPU_REGS_RDI) {
1946 (reg == VCPU_REGS_RSP) ?
1947 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1950 if (rc != X86EMUL_CONTINUE)
1959 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1961 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1962 return em_push(ctxt);
1965 static int em_popa(struct x86_emulate_ctxt *ctxt)
1967 int rc = X86EMUL_CONTINUE;
1968 int reg = VCPU_REGS_RDI;
1971 while (reg >= VCPU_REGS_RAX) {
1972 if (reg == VCPU_REGS_RSP) {
1973 rsp_increment(ctxt, ctxt->op_bytes);
1977 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
1978 if (rc != X86EMUL_CONTINUE)
1980 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
1986 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
1988 const struct x86_emulate_ops *ops = ctxt->ops;
1995 /* TODO: Add limit checks */
1996 ctxt->src.val = ctxt->eflags;
1998 if (rc != X86EMUL_CONTINUE)
2001 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2003 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2005 if (rc != X86EMUL_CONTINUE)
2008 ctxt->src.val = ctxt->_eip;
2010 if (rc != X86EMUL_CONTINUE)
2013 ops->get_idt(ctxt, &dt);
2015 eip_addr = dt.address + (irq << 2);
2016 cs_addr = dt.address + (irq << 2) + 2;
2018 rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
2019 if (rc != X86EMUL_CONTINUE)
2022 rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
2023 if (rc != X86EMUL_CONTINUE)
2026 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2027 if (rc != X86EMUL_CONTINUE)
2035 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2039 invalidate_registers(ctxt);
2040 rc = __emulate_int_real(ctxt, irq);
2041 if (rc == X86EMUL_CONTINUE)
2042 writeback_registers(ctxt);
2046 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2048 switch(ctxt->mode) {
2049 case X86EMUL_MODE_REAL:
2050 return __emulate_int_real(ctxt, irq);
2051 case X86EMUL_MODE_VM86:
2052 case X86EMUL_MODE_PROT16:
2053 case X86EMUL_MODE_PROT32:
2054 case X86EMUL_MODE_PROT64:
2056 /* Protected mode interrupts unimplemented yet */
2057 return X86EMUL_UNHANDLEABLE;
2061 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2063 int rc = X86EMUL_CONTINUE;
2064 unsigned long temp_eip = 0;
2065 unsigned long temp_eflags = 0;
2066 unsigned long cs = 0;
2067 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2068 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2069 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2070 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2071 X86_EFLAGS_AC | X86_EFLAGS_ID |
2073 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2076 /* TODO: Add stack limit check */
2078 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2080 if (rc != X86EMUL_CONTINUE)
2083 if (temp_eip & ~0xffff)
2084 return emulate_gp(ctxt, 0);
2086 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2088 if (rc != X86EMUL_CONTINUE)
2091 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2093 if (rc != X86EMUL_CONTINUE)
2096 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2098 if (rc != X86EMUL_CONTINUE)
2101 ctxt->_eip = temp_eip;
2103 if (ctxt->op_bytes == 4)
2104 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2105 else if (ctxt->op_bytes == 2) {
2106 ctxt->eflags &= ~0xffff;
2107 ctxt->eflags |= temp_eflags;
2110 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2111 ctxt->eflags |= X86_EFLAGS_FIXED;
2112 ctxt->ops->set_nmi_mask(ctxt, false);
2117 static int em_iret(struct x86_emulate_ctxt *ctxt)
2119 switch(ctxt->mode) {
2120 case X86EMUL_MODE_REAL:
2121 return emulate_iret_real(ctxt);
2122 case X86EMUL_MODE_VM86:
2123 case X86EMUL_MODE_PROT16:
2124 case X86EMUL_MODE_PROT32:
2125 case X86EMUL_MODE_PROT64:
2127 /* iret from protected mode unimplemented yet */
2128 return X86EMUL_UNHANDLEABLE;
2132 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2136 struct desc_struct new_desc;
2137 u8 cpl = ctxt->ops->cpl(ctxt);
2139 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2141 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2142 X86_TRANSFER_CALL_JMP,
2144 if (rc != X86EMUL_CONTINUE)
2147 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2148 /* Error handling is not implemented. */
2149 if (rc != X86EMUL_CONTINUE)
2150 return X86EMUL_UNHANDLEABLE;
2155 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2157 return assign_eip_near(ctxt, ctxt->src.val);
2160 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2165 old_eip = ctxt->_eip;
2166 rc = assign_eip_near(ctxt, ctxt->src.val);
2167 if (rc != X86EMUL_CONTINUE)
2169 ctxt->src.val = old_eip;
2174 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2176 u64 old = ctxt->dst.orig_val64;
2178 if (ctxt->dst.bytes == 16)
2179 return X86EMUL_UNHANDLEABLE;
2181 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2182 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2183 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2184 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2185 ctxt->eflags &= ~X86_EFLAGS_ZF;
2187 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2188 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2190 ctxt->eflags |= X86_EFLAGS_ZF;
2192 return X86EMUL_CONTINUE;
2195 static int em_ret(struct x86_emulate_ctxt *ctxt)
2200 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2201 if (rc != X86EMUL_CONTINUE)
2204 return assign_eip_near(ctxt, eip);
2207 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2210 unsigned long eip, cs;
2211 int cpl = ctxt->ops->cpl(ctxt);
2212 struct desc_struct new_desc;
2214 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2215 if (rc != X86EMUL_CONTINUE)
2217 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2218 if (rc != X86EMUL_CONTINUE)
2220 /* Outer-privilege level return is not implemented */
2221 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2222 return X86EMUL_UNHANDLEABLE;
2223 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2226 if (rc != X86EMUL_CONTINUE)
2228 rc = assign_eip_far(ctxt, eip, &new_desc);
2229 /* Error handling is not implemented. */
2230 if (rc != X86EMUL_CONTINUE)
2231 return X86EMUL_UNHANDLEABLE;
2236 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2240 rc = em_ret_far(ctxt);
2241 if (rc != X86EMUL_CONTINUE)
2243 rsp_increment(ctxt, ctxt->src.val);
2244 return X86EMUL_CONTINUE;
2247 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2249 /* Save real source value, then compare EAX against destination. */
2250 ctxt->dst.orig_val = ctxt->dst.val;
2251 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2252 ctxt->src.orig_val = ctxt->src.val;
2253 ctxt->src.val = ctxt->dst.orig_val;
2254 fastop(ctxt, em_cmp);
2256 if (ctxt->eflags & X86_EFLAGS_ZF) {
2257 /* Success: write back to memory; no update of EAX */
2258 ctxt->src.type = OP_NONE;
2259 ctxt->dst.val = ctxt->src.orig_val;
2261 /* Failure: write the value we saw to EAX. */
2262 ctxt->src.type = OP_REG;
2263 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2264 ctxt->src.val = ctxt->dst.orig_val;
2265 /* Create write-cycle to dest by writing the same value */
2266 ctxt->dst.val = ctxt->dst.orig_val;
2268 return X86EMUL_CONTINUE;
2271 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2273 int seg = ctxt->src2.val;
2277 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2279 rc = load_segment_descriptor(ctxt, sel, seg);
2280 if (rc != X86EMUL_CONTINUE)
2283 ctxt->dst.val = ctxt->src.val;
2287 static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
2289 u32 eax, ebx, ecx, edx;
2293 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2294 return edx & bit(X86_FEATURE_LM);
2297 #define GET_SMSTATE(type, smbase, offset) \
2300 int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
2302 if (r != X86EMUL_CONTINUE) \
2303 return X86EMUL_UNHANDLEABLE; \
2307 static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
2309 desc->g = (flags >> 23) & 1;
2310 desc->d = (flags >> 22) & 1;
2311 desc->l = (flags >> 21) & 1;
2312 desc->avl = (flags >> 20) & 1;
2313 desc->p = (flags >> 15) & 1;
2314 desc->dpl = (flags >> 13) & 3;
2315 desc->s = (flags >> 12) & 1;
2316 desc->type = (flags >> 8) & 15;
2319 static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2321 struct desc_struct desc;
2325 selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
2328 offset = 0x7f84 + n * 12;
2330 offset = 0x7f2c + (n - 3) * 12;
2332 set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
2333 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
2334 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
2335 ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
2336 return X86EMUL_CONTINUE;
2339 static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2341 struct desc_struct desc;
2346 offset = 0x7e00 + n * 16;
2348 selector = GET_SMSTATE(u16, smbase, offset);
2349 rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
2350 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
2351 set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
2352 base3 = GET_SMSTATE(u32, smbase, offset + 12);
2354 ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
2355 return X86EMUL_CONTINUE;
2358 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2364 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
2365 * Then enable protected mode. However, PCID cannot be enabled
2366 * if EFER.LMA=0, so set it separately.
2368 bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2370 return X86EMUL_UNHANDLEABLE;
2372 bad = ctxt->ops->set_cr(ctxt, 0, cr0);
2374 return X86EMUL_UNHANDLEABLE;
2376 if (cr4 & X86_CR4_PCIDE) {
2377 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2379 return X86EMUL_UNHANDLEABLE;
2382 return X86EMUL_CONTINUE;
2385 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2387 struct desc_struct desc;
2393 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
2394 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
2395 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
2396 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
2398 for (i = 0; i < 8; i++)
2399 *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
2401 val = GET_SMSTATE(u32, smbase, 0x7fcc);
2402 ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2403 val = GET_SMSTATE(u32, smbase, 0x7fc8);
2404 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2406 selector = GET_SMSTATE(u32, smbase, 0x7fc4);
2407 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
2408 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
2409 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
2410 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
2412 selector = GET_SMSTATE(u32, smbase, 0x7fc0);
2413 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
2414 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
2415 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
2416 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
2418 dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
2419 dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
2420 ctxt->ops->set_gdt(ctxt, &dt);
2422 dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
2423 dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
2424 ctxt->ops->set_idt(ctxt, &dt);
2426 for (i = 0; i < 6; i++) {
2427 int r = rsm_load_seg_32(ctxt, smbase, i);
2428 if (r != X86EMUL_CONTINUE)
2432 cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
2434 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
2436 return rsm_enter_protected_mode(ctxt, cr0, cr4);
2439 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2441 struct desc_struct desc;
2448 for (i = 0; i < 16; i++)
2449 *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
2451 ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
2452 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
2454 val = GET_SMSTATE(u32, smbase, 0x7f68);
2455 ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2456 val = GET_SMSTATE(u32, smbase, 0x7f60);
2457 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2459 cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
2460 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
2461 cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
2462 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
2463 val = GET_SMSTATE(u64, smbase, 0x7ed0);
2464 ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
2466 selector = GET_SMSTATE(u32, smbase, 0x7e90);
2467 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
2468 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
2469 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
2470 base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
2471 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
2473 dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
2474 dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
2475 ctxt->ops->set_idt(ctxt, &dt);
2477 selector = GET_SMSTATE(u32, smbase, 0x7e70);
2478 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
2479 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
2480 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
2481 base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
2482 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
2484 dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
2485 dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
2486 ctxt->ops->set_gdt(ctxt, &dt);
2488 r = rsm_enter_protected_mode(ctxt, cr0, cr4);
2489 if (r != X86EMUL_CONTINUE)
2492 for (i = 0; i < 6; i++) {
2493 r = rsm_load_seg_64(ctxt, smbase, i);
2494 if (r != X86EMUL_CONTINUE)
2498 return X86EMUL_CONTINUE;
2501 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2503 unsigned long cr0, cr4, efer;
2507 if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
2508 return emulate_ud(ctxt);
2511 * Get back to real mode, to prepare a safe state in which to load
2512 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
2513 * supports long mode.
2515 cr4 = ctxt->ops->get_cr(ctxt, 4);
2516 if (emulator_has_longmode(ctxt)) {
2517 struct desc_struct cs_desc;
2519 /* Zero CR4.PCIDE before CR0.PG. */
2520 if (cr4 & X86_CR4_PCIDE) {
2521 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2522 cr4 &= ~X86_CR4_PCIDE;
2525 /* A 32-bit code segment is required to clear EFER.LMA. */
2526 memset(&cs_desc, 0, sizeof(cs_desc));
2528 cs_desc.s = cs_desc.g = cs_desc.p = 1;
2529 ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
2532 /* For the 64-bit case, this will clear EFER.LMA. */
2533 cr0 = ctxt->ops->get_cr(ctxt, 0);
2534 if (cr0 & X86_CR0_PE)
2535 ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
2537 /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */
2538 if (cr4 & X86_CR4_PAE)
2539 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
2541 /* And finally go back to 32-bit mode. */
2543 ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
2545 smbase = ctxt->ops->get_smbase(ctxt);
2546 if (emulator_has_longmode(ctxt))
2547 ret = rsm_load_state_64(ctxt, smbase + 0x8000);
2549 ret = rsm_load_state_32(ctxt, smbase + 0x8000);
2551 if (ret != X86EMUL_CONTINUE) {
2552 /* FIXME: should triple fault */
2553 return X86EMUL_UNHANDLEABLE;
2556 if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
2557 ctxt->ops->set_nmi_mask(ctxt, false);
2559 ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
2560 ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
2561 return X86EMUL_CONTINUE;
2565 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2566 struct desc_struct *cs, struct desc_struct *ss)
2568 cs->l = 0; /* will be adjusted later */
2569 set_desc_base(cs, 0); /* flat segment */
2570 cs->g = 1; /* 4kb granularity */
2571 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2572 cs->type = 0x0b; /* Read, Execute, Accessed */
2574 cs->dpl = 0; /* will be adjusted later */
2579 set_desc_base(ss, 0); /* flat segment */
2580 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2581 ss->g = 1; /* 4kb granularity */
2583 ss->type = 0x03; /* Read/Write, Accessed */
2584 ss->d = 1; /* 32bit stack segment */
2591 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2593 u32 eax, ebx, ecx, edx;
2596 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2597 return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
2598 && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
2599 && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
2602 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2604 const struct x86_emulate_ops *ops = ctxt->ops;
2605 u32 eax, ebx, ecx, edx;
2608 * syscall should always be enabled in longmode - so only become
2609 * vendor specific (cpuid) if other modes are active...
2611 if (ctxt->mode == X86EMUL_MODE_PROT64)
2616 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2618 * Intel ("GenuineIntel")
2619 * remark: Intel CPUs only support "syscall" in 64bit
2620 * longmode. Also an 64bit guest with a
2621 * 32bit compat-app running will #UD !! While this
2622 * behaviour can be fixed (by emulating) into AMD
2623 * response - CPUs of AMD can't behave like Intel.
2625 if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
2626 ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
2627 edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
2630 /* AMD ("AuthenticAMD") */
2631 if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
2632 ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
2633 edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
2636 /* AMD ("AMDisbetter!") */
2637 if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
2638 ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
2639 edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
2642 /* default: (not Intel, not AMD), apply Intel's stricter rules... */
2646 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2648 const struct x86_emulate_ops *ops = ctxt->ops;
2649 struct desc_struct cs, ss;
2654 /* syscall is not available in real mode */
2655 if (ctxt->mode == X86EMUL_MODE_REAL ||
2656 ctxt->mode == X86EMUL_MODE_VM86)
2657 return emulate_ud(ctxt);
2659 if (!(em_syscall_is_enabled(ctxt)))
2660 return emulate_ud(ctxt);
2662 ops->get_msr(ctxt, MSR_EFER, &efer);
2663 setup_syscalls_segments(ctxt, &cs, &ss);
2665 if (!(efer & EFER_SCE))
2666 return emulate_ud(ctxt);
2668 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2670 cs_sel = (u16)(msr_data & 0xfffc);
2671 ss_sel = (u16)(msr_data + 8);
2673 if (efer & EFER_LMA) {
2677 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2678 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2680 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2681 if (efer & EFER_LMA) {
2682 #ifdef CONFIG_X86_64
2683 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2686 ctxt->mode == X86EMUL_MODE_PROT64 ?
2687 MSR_LSTAR : MSR_CSTAR, &msr_data);
2688 ctxt->_eip = msr_data;
2690 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2691 ctxt->eflags &= ~msr_data;
2692 ctxt->eflags |= X86_EFLAGS_FIXED;
2696 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2697 ctxt->_eip = (u32)msr_data;
2699 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2702 return X86EMUL_CONTINUE;
2705 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2707 const struct x86_emulate_ops *ops = ctxt->ops;
2708 struct desc_struct cs, ss;
2713 ops->get_msr(ctxt, MSR_EFER, &efer);
2714 /* inject #GP if in real mode */
2715 if (ctxt->mode == X86EMUL_MODE_REAL)
2716 return emulate_gp(ctxt, 0);
2719 * Not recognized on AMD in compat mode (but is recognized in legacy
2722 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2723 && !vendor_intel(ctxt))
2724 return emulate_ud(ctxt);
2726 /* sysenter/sysexit have not been tested in 64bit mode. */
2727 if (ctxt->mode == X86EMUL_MODE_PROT64)
2728 return X86EMUL_UNHANDLEABLE;
2730 setup_syscalls_segments(ctxt, &cs, &ss);
2732 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2733 if ((msr_data & 0xfffc) == 0x0)
2734 return emulate_gp(ctxt, 0);
2736 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2737 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2738 ss_sel = cs_sel + 8;
2739 if (efer & EFER_LMA) {
2744 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2745 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2747 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2748 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2750 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2751 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2754 return X86EMUL_CONTINUE;
2757 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2759 const struct x86_emulate_ops *ops = ctxt->ops;
2760 struct desc_struct cs, ss;
2761 u64 msr_data, rcx, rdx;
2763 u16 cs_sel = 0, ss_sel = 0;
2765 /* inject #GP if in real mode or Virtual 8086 mode */
2766 if (ctxt->mode == X86EMUL_MODE_REAL ||
2767 ctxt->mode == X86EMUL_MODE_VM86)
2768 return emulate_gp(ctxt, 0);
2770 setup_syscalls_segments(ctxt, &cs, &ss);
2772 if ((ctxt->rex_prefix & 0x8) != 0x0)
2773 usermode = X86EMUL_MODE_PROT64;
2775 usermode = X86EMUL_MODE_PROT32;
2777 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2778 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2782 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2784 case X86EMUL_MODE_PROT32:
2785 cs_sel = (u16)(msr_data + 16);
2786 if ((msr_data & 0xfffc) == 0x0)
2787 return emulate_gp(ctxt, 0);
2788 ss_sel = (u16)(msr_data + 24);
2792 case X86EMUL_MODE_PROT64:
2793 cs_sel = (u16)(msr_data + 32);
2794 if (msr_data == 0x0)
2795 return emulate_gp(ctxt, 0);
2796 ss_sel = cs_sel + 8;
2799 if (is_noncanonical_address(rcx) ||
2800 is_noncanonical_address(rdx))
2801 return emulate_gp(ctxt, 0);
2804 cs_sel |= SEGMENT_RPL_MASK;
2805 ss_sel |= SEGMENT_RPL_MASK;
2807 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2808 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2811 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2813 return X86EMUL_CONTINUE;
2816 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2819 if (ctxt->mode == X86EMUL_MODE_REAL)
2821 if (ctxt->mode == X86EMUL_MODE_VM86)
2823 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2824 return ctxt->ops->cpl(ctxt) > iopl;
2827 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2830 const struct x86_emulate_ops *ops = ctxt->ops;
2831 struct desc_struct tr_seg;
2834 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2835 unsigned mask = (1 << len) - 1;
2838 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2841 if (desc_limit_scaled(&tr_seg) < 103)
2843 base = get_desc_base(&tr_seg);
2844 #ifdef CONFIG_X86_64
2845 base |= ((u64)base3) << 32;
2847 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
2848 if (r != X86EMUL_CONTINUE)
2850 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2852 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
2853 if (r != X86EMUL_CONTINUE)
2855 if ((perm >> bit_idx) & mask)
2860 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2866 if (emulator_bad_iopl(ctxt))
2867 if (!emulator_io_port_access_allowed(ctxt, port, len))
2870 ctxt->perm_ok = true;
2875 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2878 * Intel CPUs mask the counter and pointers in quite strange
2879 * manner when ECX is zero due to REP-string optimizations.
2881 #ifdef CONFIG_X86_64
2882 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2885 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2888 case 0xa4: /* movsb */
2889 case 0xa5: /* movsd/w */
2890 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2892 case 0xaa: /* stosb */
2893 case 0xab: /* stosd/w */
2894 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2899 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2900 struct tss_segment_16 *tss)
2902 tss->ip = ctxt->_eip;
2903 tss->flag = ctxt->eflags;
2904 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2905 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2906 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2907 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2908 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2909 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2910 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2911 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2913 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2914 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2915 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2916 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2917 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2920 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2921 struct tss_segment_16 *tss)
2926 ctxt->_eip = tss->ip;
2927 ctxt->eflags = tss->flag | 2;
2928 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2929 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2930 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2931 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2932 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2933 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2934 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2935 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2938 * SDM says that segment selectors are loaded before segment
2941 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2942 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2943 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2944 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2945 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2950 * Now load segment descriptors. If fault happens at this stage
2951 * it is handled in a context of new task
2953 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2954 X86_TRANSFER_TASK_SWITCH, NULL);
2955 if (ret != X86EMUL_CONTINUE)
2957 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2958 X86_TRANSFER_TASK_SWITCH, NULL);
2959 if (ret != X86EMUL_CONTINUE)
2961 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2962 X86_TRANSFER_TASK_SWITCH, NULL);
2963 if (ret != X86EMUL_CONTINUE)
2965 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2966 X86_TRANSFER_TASK_SWITCH, NULL);
2967 if (ret != X86EMUL_CONTINUE)
2969 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2970 X86_TRANSFER_TASK_SWITCH, NULL);
2971 if (ret != X86EMUL_CONTINUE)
2974 return X86EMUL_CONTINUE;
2977 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2978 u16 tss_selector, u16 old_tss_sel,
2979 ulong old_tss_base, struct desc_struct *new_desc)
2981 const struct x86_emulate_ops *ops = ctxt->ops;
2982 struct tss_segment_16 tss_seg;
2984 u32 new_tss_base = get_desc_base(new_desc);
2986 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2988 if (ret != X86EMUL_CONTINUE)
2991 save_state_to_tss16(ctxt, &tss_seg);
2993 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2995 if (ret != X86EMUL_CONTINUE)
2998 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
3000 if (ret != X86EMUL_CONTINUE)
3003 if (old_tss_sel != 0xffff) {
3004 tss_seg.prev_task_link = old_tss_sel;
3006 ret = ops->write_std(ctxt, new_tss_base,
3007 &tss_seg.prev_task_link,
3008 sizeof tss_seg.prev_task_link,
3010 if (ret != X86EMUL_CONTINUE)
3014 return load_state_from_tss16(ctxt, &tss_seg);
3017 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
3018 struct tss_segment_32 *tss)
3020 /* CR3 and ldt selector are not saved intentionally */
3021 tss->eip = ctxt->_eip;
3022 tss->eflags = ctxt->eflags;
3023 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
3024 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
3025 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
3026 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
3027 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
3028 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
3029 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
3030 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
3032 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
3033 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
3034 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
3035 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
3036 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
3037 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
3040 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
3041 struct tss_segment_32 *tss)
3046 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
3047 return emulate_gp(ctxt, 0);
3048 ctxt->_eip = tss->eip;
3049 ctxt->eflags = tss->eflags | 2;
3051 /* General purpose registers */
3052 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
3053 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
3054 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
3055 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
3056 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
3057 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
3058 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
3059 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
3062 * SDM says that segment selectors are loaded before segment
3063 * descriptors. This is important because CPL checks will
3066 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
3067 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
3068 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
3069 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
3070 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3071 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
3072 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
3075 * If we're switching between Protected Mode and VM86, we need to make
3076 * sure to update the mode before loading the segment descriptors so
3077 * that the selectors are interpreted correctly.
3079 if (ctxt->eflags & X86_EFLAGS_VM) {
3080 ctxt->mode = X86EMUL_MODE_VM86;
3083 ctxt->mode = X86EMUL_MODE_PROT32;
3088 * Now load segment descriptors. If fault happenes at this stage
3089 * it is handled in a context of new task
3091 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
3092 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
3093 if (ret != X86EMUL_CONTINUE)
3095 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3096 X86_TRANSFER_TASK_SWITCH, NULL);
3097 if (ret != X86EMUL_CONTINUE)
3099 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3100 X86_TRANSFER_TASK_SWITCH, NULL);
3101 if (ret != X86EMUL_CONTINUE)
3103 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3104 X86_TRANSFER_TASK_SWITCH, NULL);
3105 if (ret != X86EMUL_CONTINUE)
3107 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3108 X86_TRANSFER_TASK_SWITCH, NULL);
3109 if (ret != X86EMUL_CONTINUE)
3111 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
3112 X86_TRANSFER_TASK_SWITCH, NULL);
3113 if (ret != X86EMUL_CONTINUE)
3115 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
3116 X86_TRANSFER_TASK_SWITCH, NULL);
3121 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
3122 u16 tss_selector, u16 old_tss_sel,
3123 ulong old_tss_base, struct desc_struct *new_desc)
3125 const struct x86_emulate_ops *ops = ctxt->ops;
3126 struct tss_segment_32 tss_seg;
3128 u32 new_tss_base = get_desc_base(new_desc);
3129 u32 eip_offset = offsetof(struct tss_segment_32, eip);
3130 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
3132 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3134 if (ret != X86EMUL_CONTINUE)
3137 save_state_to_tss32(ctxt, &tss_seg);
3139 /* Only GP registers and segment selectors are saved */
3140 ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
3141 ldt_sel_offset - eip_offset, &ctxt->exception);
3142 if (ret != X86EMUL_CONTINUE)
3145 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
3147 if (ret != X86EMUL_CONTINUE)
3150 if (old_tss_sel != 0xffff) {
3151 tss_seg.prev_task_link = old_tss_sel;
3153 ret = ops->write_std(ctxt, new_tss_base,
3154 &tss_seg.prev_task_link,
3155 sizeof tss_seg.prev_task_link,
3157 if (ret != X86EMUL_CONTINUE)
3161 return load_state_from_tss32(ctxt, &tss_seg);
3164 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
3165 u16 tss_selector, int idt_index, int reason,
3166 bool has_error_code, u32 error_code)
3168 const struct x86_emulate_ops *ops = ctxt->ops;
3169 struct desc_struct curr_tss_desc, next_tss_desc;
3171 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
3172 ulong old_tss_base =
3173 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
3175 ulong desc_addr, dr7;
3177 /* FIXME: old_tss_base == ~0 ? */
3179 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
3180 if (ret != X86EMUL_CONTINUE)
3182 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
3183 if (ret != X86EMUL_CONTINUE)
3186 /* FIXME: check that next_tss_desc is tss */
3189 * Check privileges. The three cases are task switch caused by...
3191 * 1. jmp/call/int to task gate: Check against DPL of the task gate
3192 * 2. Exception/IRQ/iret: No check is performed
3193 * 3. jmp/call to TSS/task-gate: No check is performed since the
3194 * hardware checks it before exiting.
3196 if (reason == TASK_SWITCH_GATE) {
3197 if (idt_index != -1) {
3198 /* Software interrupts */
3199 struct desc_struct task_gate_desc;
3202 ret = read_interrupt_descriptor(ctxt, idt_index,
3204 if (ret != X86EMUL_CONTINUE)
3207 dpl = task_gate_desc.dpl;
3208 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
3209 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
3213 desc_limit = desc_limit_scaled(&next_tss_desc);
3214 if (!next_tss_desc.p ||
3215 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
3216 desc_limit < 0x2b)) {
3217 return emulate_ts(ctxt, tss_selector & 0xfffc);
3220 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3221 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
3222 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
3225 if (reason == TASK_SWITCH_IRET)
3226 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
3228 /* set back link to prev task only if NT bit is set in eflags
3229 note that old_tss_sel is not used after this point */
3230 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
3231 old_tss_sel = 0xffff;
3233 if (next_tss_desc.type & 8)
3234 ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
3235 old_tss_base, &next_tss_desc);
3237 ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
3238 old_tss_base, &next_tss_desc);
3239 if (ret != X86EMUL_CONTINUE)
3242 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
3243 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3245 if (reason != TASK_SWITCH_IRET) {
3246 next_tss_desc.type |= (1 << 1); /* set busy flag */
3247 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3250 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3251 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3253 if (has_error_code) {
3254 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3255 ctxt->lock_prefix = 0;
3256 ctxt->src.val = (unsigned long) error_code;
3257 ret = em_push(ctxt);
3260 ops->get_dr(ctxt, 7, &dr7);
3261 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3266 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3267 u16 tss_selector, int idt_index, int reason,
3268 bool has_error_code, u32 error_code)
3272 invalidate_registers(ctxt);
3273 ctxt->_eip = ctxt->eip;
3274 ctxt->dst.type = OP_NONE;
3276 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3277 has_error_code, error_code);
3279 if (rc == X86EMUL_CONTINUE) {
3280 ctxt->eip = ctxt->_eip;
3281 writeback_registers(ctxt);
3284 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3287 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3290 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3292 register_address_increment(ctxt, reg, df * op->bytes);
3293 op->addr.mem.ea = register_address(ctxt, reg);
3296 static int em_das(struct x86_emulate_ctxt *ctxt)
3299 bool af, cf, old_cf;
3301 cf = ctxt->eflags & X86_EFLAGS_CF;
3307 af = ctxt->eflags & X86_EFLAGS_AF;
3308 if ((al & 0x0f) > 9 || af) {
3310 cf = old_cf | (al >= 250);
3315 if (old_al > 0x99 || old_cf) {
3321 /* Set PF, ZF, SF */
3322 ctxt->src.type = OP_IMM;
3324 ctxt->src.bytes = 1;
3325 fastop(ctxt, em_or);
3326 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3328 ctxt->eflags |= X86_EFLAGS_CF;
3330 ctxt->eflags |= X86_EFLAGS_AF;
3331 return X86EMUL_CONTINUE;
3334 static int em_aam(struct x86_emulate_ctxt *ctxt)
3338 if (ctxt->src.val == 0)
3339 return emulate_de(ctxt);
3341 al = ctxt->dst.val & 0xff;
3342 ah = al / ctxt->src.val;
3343 al %= ctxt->src.val;
3345 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3347 /* Set PF, ZF, SF */
3348 ctxt->src.type = OP_IMM;
3350 ctxt->src.bytes = 1;
3351 fastop(ctxt, em_or);
3353 return X86EMUL_CONTINUE;
3356 static int em_aad(struct x86_emulate_ctxt *ctxt)
3358 u8 al = ctxt->dst.val & 0xff;
3359 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3361 al = (al + (ah * ctxt->src.val)) & 0xff;
3363 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3365 /* Set PF, ZF, SF */
3366 ctxt->src.type = OP_IMM;
3368 ctxt->src.bytes = 1;
3369 fastop(ctxt, em_or);
3371 return X86EMUL_CONTINUE;
3374 static int em_call(struct x86_emulate_ctxt *ctxt)
3377 long rel = ctxt->src.val;
3379 ctxt->src.val = (unsigned long)ctxt->_eip;
3380 rc = jmp_rel(ctxt, rel);
3381 if (rc != X86EMUL_CONTINUE)
3383 return em_push(ctxt);
3386 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3391 struct desc_struct old_desc, new_desc;
3392 const struct x86_emulate_ops *ops = ctxt->ops;
3393 int cpl = ctxt->ops->cpl(ctxt);
3394 enum x86emul_mode prev_mode = ctxt->mode;
3396 old_eip = ctxt->_eip;
3397 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3399 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3400 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3401 X86_TRANSFER_CALL_JMP, &new_desc);
3402 if (rc != X86EMUL_CONTINUE)
3405 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3406 if (rc != X86EMUL_CONTINUE)
3409 ctxt->src.val = old_cs;
3411 if (rc != X86EMUL_CONTINUE)
3414 ctxt->src.val = old_eip;
3416 /* If we failed, we tainted the memory, but the very least we should
3418 if (rc != X86EMUL_CONTINUE) {
3419 pr_warn_once("faulting far call emulation tainted memory\n");
3424 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3425 ctxt->mode = prev_mode;
3430 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3435 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3436 if (rc != X86EMUL_CONTINUE)
3438 rc = assign_eip_near(ctxt, eip);
3439 if (rc != X86EMUL_CONTINUE)
3441 rsp_increment(ctxt, ctxt->src.val);
3442 return X86EMUL_CONTINUE;
3445 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3447 /* Write back the register source. */
3448 ctxt->src.val = ctxt->dst.val;
3449 write_register_operand(&ctxt->src);
3451 /* Write back the memory destination with implicit LOCK prefix. */
3452 ctxt->dst.val = ctxt->src.orig_val;
3453 ctxt->lock_prefix = 1;
3454 return X86EMUL_CONTINUE;
3457 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3459 ctxt->dst.val = ctxt->src2.val;
3460 return fastop(ctxt, em_imul);
3463 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3465 ctxt->dst.type = OP_REG;
3466 ctxt->dst.bytes = ctxt->src.bytes;
3467 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3468 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3470 return X86EMUL_CONTINUE;
3473 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3477 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3478 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3479 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3480 return X86EMUL_CONTINUE;
3483 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3487 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3488 return emulate_gp(ctxt, 0);
3489 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3490 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3491 return X86EMUL_CONTINUE;
3494 static int em_mov(struct x86_emulate_ctxt *ctxt)
3496 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3497 return X86EMUL_CONTINUE;
3500 #define FFL(x) bit(X86_FEATURE_##x)
3502 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3504 u32 ebx, ecx, edx, eax = 1;
3508 * Check MOVBE is set in the guest-visible CPUID leaf.
3510 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3511 if (!(ecx & FFL(MOVBE)))
3512 return emulate_ud(ctxt);
3514 switch (ctxt->op_bytes) {
3517 * From MOVBE definition: "...When the operand size is 16 bits,
3518 * the upper word of the destination register remains unchanged
3521 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3522 * rules so we have to do the operation almost per hand.
3524 tmp = (u16)ctxt->src.val;
3525 ctxt->dst.val &= ~0xffffUL;
3526 ctxt->dst.val |= (unsigned long)swab16(tmp);
3529 ctxt->dst.val = swab32((u32)ctxt->src.val);
3532 ctxt->dst.val = swab64(ctxt->src.val);
3537 return X86EMUL_CONTINUE;
3540 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3542 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
3543 return emulate_gp(ctxt, 0);
3545 /* Disable writeback. */
3546 ctxt->dst.type = OP_NONE;
3547 return X86EMUL_CONTINUE;
3550 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3554 if (ctxt->mode == X86EMUL_MODE_PROT64)
3555 val = ctxt->src.val & ~0ULL;
3557 val = ctxt->src.val & ~0U;
3559 /* #UD condition is already handled. */
3560 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3561 return emulate_gp(ctxt, 0);
3563 /* Disable writeback. */
3564 ctxt->dst.type = OP_NONE;
3565 return X86EMUL_CONTINUE;
3568 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3572 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3573 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3574 if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
3575 return emulate_gp(ctxt, 0);
3577 return X86EMUL_CONTINUE;
3580 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3584 if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
3585 return emulate_gp(ctxt, 0);
3587 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3588 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3589 return X86EMUL_CONTINUE;
3592 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3594 if (ctxt->modrm_reg > VCPU_SREG_GS)
3595 return emulate_ud(ctxt);
3597 ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
3598 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3599 ctxt->dst.bytes = 2;
3600 return X86EMUL_CONTINUE;
3603 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3605 u16 sel = ctxt->src.val;
3607 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3608 return emulate_ud(ctxt);
3610 if (ctxt->modrm_reg == VCPU_SREG_SS)
3611 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3613 /* Disable writeback. */
3614 ctxt->dst.type = OP_NONE;
3615 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3618 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3620 u16 sel = ctxt->src.val;
3622 /* Disable writeback. */
3623 ctxt->dst.type = OP_NONE;
3624 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3627 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3629 u16 sel = ctxt->src.val;
3631 /* Disable writeback. */
3632 ctxt->dst.type = OP_NONE;
3633 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3636 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3641 rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3642 if (rc == X86EMUL_CONTINUE)
3643 ctxt->ops->invlpg(ctxt, linear);
3644 /* Disable writeback. */
3645 ctxt->dst.type = OP_NONE;
3646 return X86EMUL_CONTINUE;
3649 static int em_clts(struct x86_emulate_ctxt *ctxt)
3653 cr0 = ctxt->ops->get_cr(ctxt, 0);
3655 ctxt->ops->set_cr(ctxt, 0, cr0);
3656 return X86EMUL_CONTINUE;
3659 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3661 int rc = ctxt->ops->fix_hypercall(ctxt);
3663 if (rc != X86EMUL_CONTINUE)
3666 /* Let the processor re-execute the fixed hypercall */
3667 ctxt->_eip = ctxt->eip;
3668 /* Disable writeback. */
3669 ctxt->dst.type = OP_NONE;
3670 return X86EMUL_CONTINUE;
3673 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3674 void (*get)(struct x86_emulate_ctxt *ctxt,
3675 struct desc_ptr *ptr))
3677 struct desc_ptr desc_ptr;
3679 if (ctxt->mode == X86EMUL_MODE_PROT64)
3681 get(ctxt, &desc_ptr);
3682 if (ctxt->op_bytes == 2) {
3684 desc_ptr.address &= 0x00ffffff;
3686 /* Disable writeback. */
3687 ctxt->dst.type = OP_NONE;
3688 return segmented_write(ctxt, ctxt->dst.addr.mem,
3689 &desc_ptr, 2 + ctxt->op_bytes);
3692 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3694 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3697 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3699 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3702 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3704 struct desc_ptr desc_ptr;
3707 if (ctxt->mode == X86EMUL_MODE_PROT64)
3709 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3710 &desc_ptr.size, &desc_ptr.address,
3712 if (rc != X86EMUL_CONTINUE)
3714 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3715 is_noncanonical_address(desc_ptr.address))
3716 return emulate_gp(ctxt, 0);
3718 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3720 ctxt->ops->set_idt(ctxt, &desc_ptr);
3721 /* Disable writeback. */
3722 ctxt->dst.type = OP_NONE;
3723 return X86EMUL_CONTINUE;
3726 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3728 return em_lgdt_lidt(ctxt, true);
3731 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3733 return em_lgdt_lidt(ctxt, false);
3736 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3738 if (ctxt->dst.type == OP_MEM)
3739 ctxt->dst.bytes = 2;
3740 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3741 return X86EMUL_CONTINUE;
3744 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3746 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3747 | (ctxt->src.val & 0x0f));
3748 ctxt->dst.type = OP_NONE;
3749 return X86EMUL_CONTINUE;
3752 static int em_loop(struct x86_emulate_ctxt *ctxt)
3754 int rc = X86EMUL_CONTINUE;
3756 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3757 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3758 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3759 rc = jmp_rel(ctxt, ctxt->src.val);
3764 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3766 int rc = X86EMUL_CONTINUE;
3768 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3769 rc = jmp_rel(ctxt, ctxt->src.val);
3774 static int em_in(struct x86_emulate_ctxt *ctxt)
3776 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3778 return X86EMUL_IO_NEEDED;
3780 return X86EMUL_CONTINUE;
3783 static int em_out(struct x86_emulate_ctxt *ctxt)
3785 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3787 /* Disable writeback. */
3788 ctxt->dst.type = OP_NONE;
3789 return X86EMUL_CONTINUE;
3792 static int em_cli(struct x86_emulate_ctxt *ctxt)
3794 if (emulator_bad_iopl(ctxt))
3795 return emulate_gp(ctxt, 0);
3797 ctxt->eflags &= ~X86_EFLAGS_IF;
3798 return X86EMUL_CONTINUE;
3801 static int em_sti(struct x86_emulate_ctxt *ctxt)
3803 if (emulator_bad_iopl(ctxt))
3804 return emulate_gp(ctxt, 0);
3806 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3807 ctxt->eflags |= X86_EFLAGS_IF;
3808 return X86EMUL_CONTINUE;
3811 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3813 u32 eax, ebx, ecx, edx;
3815 eax = reg_read(ctxt, VCPU_REGS_RAX);
3816 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3817 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3818 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3819 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3820 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3821 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3822 return X86EMUL_CONTINUE;
3825 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3829 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3831 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3833 ctxt->eflags &= ~0xffUL;
3834 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3835 return X86EMUL_CONTINUE;
3838 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3840 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3841 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3842 return X86EMUL_CONTINUE;
3845 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3847 switch (ctxt->op_bytes) {
3848 #ifdef CONFIG_X86_64
3850 asm("bswap %0" : "+r"(ctxt->dst.val));
3854 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3857 return X86EMUL_CONTINUE;
3860 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3862 /* emulating clflush regardless of cpuid */
3863 return X86EMUL_CONTINUE;
3866 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3868 ctxt->dst.val = (s32) ctxt->src.val;
3869 return X86EMUL_CONTINUE;
3872 static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3874 u32 eax = 1, ebx, ecx = 0, edx;
3876 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3877 if (!(edx & FFL(FXSR)))
3878 return emulate_ud(ctxt);
3880 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3881 return emulate_nm(ctxt);
3884 * Don't emulate a case that should never be hit, instead of working
3885 * around a lack of fxsave64/fxrstor64 on old compilers.
3887 if (ctxt->mode >= X86EMUL_MODE_PROT64)
3888 return X86EMUL_UNHANDLEABLE;
3890 return X86EMUL_CONTINUE;
3894 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
3897 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
3898 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
3900 * 3) 64-bit mode with REX.W prefix
3901 * - like (2), but XMM 8-15 are being saved and restored
3902 * 4) 64-bit mode without REX.W prefix
3903 * - like (3), but FIP and FDP are 64 bit
3905 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
3906 * desired result. (4) is not emulated.
3908 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
3909 * and FPU DS) should match.
3911 static int em_fxsave(struct x86_emulate_ctxt *ctxt)
3913 struct fxregs_state fx_state;
3917 rc = check_fxsr(ctxt);
3918 if (rc != X86EMUL_CONTINUE)
3921 ctxt->ops->get_fpu(ctxt);
3923 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
3925 ctxt->ops->put_fpu(ctxt);
3927 if (rc != X86EMUL_CONTINUE)
3930 if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)
3931 size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]);
3933 size = offsetof(struct fxregs_state, xmm_space[0]);
3935 return segmented_write(ctxt, ctxt->memop.addr.mem, &fx_state, size);
3938 static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
3939 struct fxregs_state *new)
3941 int rc = X86EMUL_CONTINUE;
3942 struct fxregs_state old;
3944 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old));
3945 if (rc != X86EMUL_CONTINUE)
3949 * 64 bit host will restore XMM 8-15, which is not correct on non-64
3950 * bit guests. Load the current values in order to preserve 64 bit
3951 * XMMs after fxrstor.
3953 #ifdef CONFIG_X86_64
3954 /* XXX: accessing XMM 8-15 very awkwardly */
3955 memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16);
3959 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but
3960 * does save and restore MXCSR.
3962 if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))
3963 memcpy(new->xmm_space, old.xmm_space, 8 * 16);
3968 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
3970 struct fxregs_state fx_state;
3973 rc = check_fxsr(ctxt);
3974 if (rc != X86EMUL_CONTINUE)
3977 rc = segmented_read(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
3978 if (rc != X86EMUL_CONTINUE)
3981 if (fx_state.mxcsr >> 16)
3982 return emulate_gp(ctxt, 0);
3984 ctxt->ops->get_fpu(ctxt);
3986 if (ctxt->mode < X86EMUL_MODE_PROT64)
3987 rc = fxrstor_fixup(ctxt, &fx_state);
3989 if (rc == X86EMUL_CONTINUE)
3990 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
3992 ctxt->ops->put_fpu(ctxt);
3997 static bool valid_cr(int nr)
4009 static int check_cr_read(struct x86_emulate_ctxt *ctxt)
4011 if (!valid_cr(ctxt->modrm_reg))
4012 return emulate_ud(ctxt);
4014 return X86EMUL_CONTINUE;
4017 static int check_cr_write(struct x86_emulate_ctxt *ctxt)
4019 u64 new_val = ctxt->src.val64;
4020 int cr = ctxt->modrm_reg;
4023 static u64 cr_reserved_bits[] = {
4024 0xffffffff00000000ULL,
4025 0, 0, 0, /* CR3 checked later */
4032 return emulate_ud(ctxt);
4034 if (new_val & cr_reserved_bits[cr])
4035 return emulate_gp(ctxt, 0);
4040 if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
4041 ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
4042 return emulate_gp(ctxt, 0);
4044 cr4 = ctxt->ops->get_cr(ctxt, 4);
4045 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4047 if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
4048 !(cr4 & X86_CR4_PAE))
4049 return emulate_gp(ctxt, 0);
4056 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4057 if (efer & EFER_LMA)
4058 rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
4061 return emulate_gp(ctxt, 0);
4066 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4068 if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
4069 return emulate_gp(ctxt, 0);
4075 return X86EMUL_CONTINUE;
4078 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
4082 ctxt->ops->get_dr(ctxt, 7, &dr7);
4084 /* Check if DR7.Global_Enable is set */
4085 return dr7 & (1 << 13);
4088 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
4090 int dr = ctxt->modrm_reg;
4094 return emulate_ud(ctxt);
4096 cr4 = ctxt->ops->get_cr(ctxt, 4);
4097 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
4098 return emulate_ud(ctxt);
4100 if (check_dr7_gd(ctxt)) {
4103 ctxt->ops->get_dr(ctxt, 6, &dr6);
4105 dr6 |= DR6_BD | DR6_RTM;
4106 ctxt->ops->set_dr(ctxt, 6, dr6);
4107 return emulate_db(ctxt);
4110 return X86EMUL_CONTINUE;
4113 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
4115 u64 new_val = ctxt->src.val64;
4116 int dr = ctxt->modrm_reg;
4118 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
4119 return emulate_gp(ctxt, 0);
4121 return check_dr_read(ctxt);
4124 static int check_svme(struct x86_emulate_ctxt *ctxt)
4128 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
4130 if (!(efer & EFER_SVME))
4131 return emulate_ud(ctxt);
4133 return X86EMUL_CONTINUE;
4136 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
4138 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
4140 /* Valid physical address? */
4141 if (rax & 0xffff000000000000ULL)
4142 return emulate_gp(ctxt, 0);
4144 return check_svme(ctxt);
4147 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
4149 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4151 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
4152 return emulate_ud(ctxt);
4154 return X86EMUL_CONTINUE;
4157 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
4159 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4160 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
4162 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
4163 ctxt->ops->check_pmc(ctxt, rcx))
4164 return emulate_gp(ctxt, 0);
4166 return X86EMUL_CONTINUE;
4169 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
4171 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
4172 if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
4173 return emulate_gp(ctxt, 0);
4175 return X86EMUL_CONTINUE;
4178 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
4180 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
4181 if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
4182 return emulate_gp(ctxt, 0);
4184 return X86EMUL_CONTINUE;
4187 #define D(_y) { .flags = (_y) }
4188 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
4189 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
4190 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4191 #define N D(NotImpl)
4192 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4193 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4194 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4195 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4196 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4197 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4198 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4199 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4200 #define II(_f, _e, _i) \
4201 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4202 #define IIP(_f, _e, _i, _p) \
4203 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4204 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4205 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4207 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4208 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4209 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4210 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4211 #define I2bvIP(_f, _e, _i, _p) \
4212 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4214 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4215 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4216 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4218 static const struct opcode group7_rm0[] = {
4220 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4224 static const struct opcode group7_rm1[] = {
4225 DI(SrcNone | Priv, monitor),
4226 DI(SrcNone | Priv, mwait),
4230 static const struct opcode group7_rm3[] = {
4231 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4232 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4233 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4234 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4235 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4236 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4237 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4238 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4241 static const struct opcode group7_rm7[] = {
4243 DIP(SrcNone, rdtscp, check_rdtsc),
4247 static const struct opcode group1[] = {
4249 F(Lock | PageTable, em_or),
4252 F(Lock | PageTable, em_and),
4258 static const struct opcode group1A[] = {
4259 I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N,
4262 static const struct opcode group2[] = {
4263 F(DstMem | ModRM, em_rol),
4264 F(DstMem | ModRM, em_ror),
4265 F(DstMem | ModRM, em_rcl),
4266 F(DstMem | ModRM, em_rcr),
4267 F(DstMem | ModRM, em_shl),
4268 F(DstMem | ModRM, em_shr),
4269 F(DstMem | ModRM, em_shl),
4270 F(DstMem | ModRM, em_sar),
4273 static const struct opcode group3[] = {
4274 F(DstMem | SrcImm | NoWrite, em_test),
4275 F(DstMem | SrcImm | NoWrite, em_test),
4276 F(DstMem | SrcNone | Lock, em_not),
4277 F(DstMem | SrcNone | Lock, em_neg),
4278 F(DstXacc | Src2Mem, em_mul_ex),
4279 F(DstXacc | Src2Mem, em_imul_ex),
4280 F(DstXacc | Src2Mem, em_div_ex),
4281 F(DstXacc | Src2Mem, em_idiv_ex),
4284 static const struct opcode group4[] = {
4285 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4286 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4290 static const struct opcode group5[] = {
4291 F(DstMem | SrcNone | Lock, em_inc),
4292 F(DstMem | SrcNone | Lock, em_dec),
4293 I(SrcMem | NearBranch, em_call_near_abs),
4294 I(SrcMemFAddr | ImplicitOps, em_call_far),
4295 I(SrcMem | NearBranch, em_jmp_abs),
4296 I(SrcMemFAddr | ImplicitOps, em_jmp_far),
4297 I(SrcMem | Stack, em_push), D(Undefined),
4300 static const struct opcode group6[] = {
4301 DI(Prot | DstMem, sldt),
4302 DI(Prot | DstMem, str),
4303 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4304 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4308 static const struct group_dual group7 = { {
4309 II(Mov | DstMem, em_sgdt, sgdt),
4310 II(Mov | DstMem, em_sidt, sidt),
4311 II(SrcMem | Priv, em_lgdt, lgdt),
4312 II(SrcMem | Priv, em_lidt, lidt),
4313 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4314 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4315 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4319 N, EXT(0, group7_rm3),
4320 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4321 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4325 static const struct opcode group8[] = {
4327 F(DstMem | SrcImmByte | NoWrite, em_bt),
4328 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4329 F(DstMem | SrcImmByte | Lock, em_btr),
4330 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4333 static const struct group_dual group9 = { {
4334 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4336 N, N, N, N, N, N, N, N,
4339 static const struct opcode group11[] = {
4340 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4344 static const struct gprefix pfx_0f_ae_7 = {
4345 I(SrcMem | ByteOp, em_clflush), N, N, N,
4348 static const struct group_dual group15 = { {
4349 I(ModRM | Aligned16, em_fxsave),
4350 I(ModRM | Aligned16, em_fxrstor),
4351 N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4353 N, N, N, N, N, N, N, N,
4356 static const struct gprefix pfx_0f_6f_0f_7f = {
4357 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4360 static const struct instr_dual instr_dual_0f_2b = {
4364 static const struct gprefix pfx_0f_2b = {
4365 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4368 static const struct gprefix pfx_0f_28_0f_29 = {
4369 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4372 static const struct gprefix pfx_0f_e7 = {
4373 N, I(Sse, em_mov), N, N,
4376 static const struct escape escape_d9 = { {
4377 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4380 N, N, N, N, N, N, N, N,
4382 N, N, N, N, N, N, N, N,
4384 N, N, N, N, N, N, N, N,
4386 N, N, N, N, N, N, N, N,
4388 N, N, N, N, N, N, N, N,
4390 N, N, N, N, N, N, N, N,
4392 N, N, N, N, N, N, N, N,
4394 N, N, N, N, N, N, N, N,
4397 static const struct escape escape_db = { {
4398 N, N, N, N, N, N, N, N,
4401 N, N, N, N, N, N, N, N,
4403 N, N, N, N, N, N, N, N,
4405 N, N, N, N, N, N, N, N,
4407 N, N, N, N, N, N, N, N,
4409 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4411 N, N, N, N, N, N, N, N,
4413 N, N, N, N, N, N, N, N,
4415 N, N, N, N, N, N, N, N,
4418 static const struct escape escape_dd = { {
4419 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4422 N, N, N, N, N, N, N, N,
4424 N, N, N, N, N, N, N, N,
4426 N, N, N, N, N, N, N, N,
4428 N, N, N, N, N, N, N, N,
4430 N, N, N, N, N, N, N, N,
4432 N, N, N, N, N, N, N, N,
4434 N, N, N, N, N, N, N, N,
4436 N, N, N, N, N, N, N, N,
4439 static const struct instr_dual instr_dual_0f_c3 = {
4440 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4443 static const struct mode_dual mode_dual_63 = {
4444 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4447 static const struct opcode opcode_table[256] = {
4449 F6ALU(Lock, em_add),
4450 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4451 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4453 F6ALU(Lock | PageTable, em_or),
4454 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4457 F6ALU(Lock, em_adc),
4458 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4459 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4461 F6ALU(Lock, em_sbb),
4462 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4463 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4465 F6ALU(Lock | PageTable, em_and), N, N,
4467 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4469 F6ALU(Lock, em_xor), N, N,
4471 F6ALU(NoWrite, em_cmp), N, N,
4473 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4475 X8(I(SrcReg | Stack, em_push)),
4477 X8(I(DstReg | Stack, em_pop)),
4479 I(ImplicitOps | Stack | No64, em_pusha),
4480 I(ImplicitOps | Stack | No64, em_popa),
4481 N, MD(ModRM, &mode_dual_63),
4484 I(SrcImm | Mov | Stack, em_push),
4485 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4486 I(SrcImmByte | Mov | Stack, em_push),
4487 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4488 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4489 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4491 X16(D(SrcImmByte | NearBranch)),
4493 G(ByteOp | DstMem | SrcImm, group1),
4494 G(DstMem | SrcImm, group1),
4495 G(ByteOp | DstMem | SrcImm | No64, group1),
4496 G(DstMem | SrcImmByte, group1),
4497 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4498 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4500 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4501 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4502 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4503 D(ModRM | SrcMem | NoAccess | DstReg),
4504 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4507 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4509 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4510 I(SrcImmFAddr | No64, em_call_far), N,
4511 II(ImplicitOps | Stack, em_pushf, pushf),
4512 II(ImplicitOps | Stack, em_popf, popf),
4513 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4515 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4516 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4517 I2bv(SrcSI | DstDI | Mov | String, em_mov),
4518 F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
4520 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4521 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4522 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4523 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4525 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4527 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4529 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4530 I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
4531 I(ImplicitOps | NearBranch, em_ret),
4532 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4533 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4534 G(ByteOp, group11), G(0, group11),
4536 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4537 I(ImplicitOps | SrcImmU16, em_ret_far_imm),
4538 I(ImplicitOps, em_ret_far),
4539 D(ImplicitOps), DI(SrcImmByte, intn),
4540 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4542 G(Src2One | ByteOp, group2), G(Src2One, group2),
4543 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4544 I(DstAcc | SrcImmUByte | No64, em_aam),
4545 I(DstAcc | SrcImmUByte | No64, em_aad),
4546 F(DstAcc | ByteOp | No64, em_salc),
4547 I(DstAcc | SrcXLat | ByteOp, em_mov),
4549 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4551 X3(I(SrcImmByte | NearBranch, em_loop)),
4552 I(SrcImmByte | NearBranch, em_jcxz),
4553 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4554 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4556 I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
4557 I(SrcImmFAddr | No64, em_jmp_far),
4558 D(SrcImmByte | ImplicitOps | NearBranch),
4559 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4560 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4562 N, DI(ImplicitOps, icebp), N, N,
4563 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4564 G(ByteOp, group3), G(0, group3),
4566 D(ImplicitOps), D(ImplicitOps),
4567 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4568 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4571 static const struct opcode twobyte_table[256] = {
4573 G(0, group6), GD(0, &group7), N, N,
4574 N, I(ImplicitOps | EmulateOnUD, em_syscall),
4575 II(ImplicitOps | Priv, em_clts, clts), N,
4576 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4577 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4579 N, N, N, N, N, N, N, N,
4580 D(ImplicitOps | ModRM | SrcMem | NoAccess),
4581 N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
4583 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
4584 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4585 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4587 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4590 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4591 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4592 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4595 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4596 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4597 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4598 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4599 I(ImplicitOps | EmulateOnUD, em_sysenter),
4600 I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
4602 N, N, N, N, N, N, N, N,
4604 X16(D(DstReg | SrcMem | ModRM)),
4606 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4611 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4616 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4618 X16(D(SrcImm | NearBranch)),
4620 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4622 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4623 II(ImplicitOps, em_cpuid, cpuid),
4624 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4625 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4626 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4628 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4629 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4630 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4631 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4632 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4633 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4635 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4636 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4637 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4638 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4639 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4640 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4644 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4645 I(DstReg | SrcMem | ModRM, em_bsf_c),
4646 I(DstReg | SrcMem | ModRM, em_bsr_c),
4647 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4649 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4650 N, ID(0, &instr_dual_0f_c3),
4651 N, N, N, GD(0, &group9),
4653 X8(I(DstReg, em_bswap)),
4655 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4657 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4658 N, N, N, N, N, N, N, N,
4660 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4663 static const struct instr_dual instr_dual_0f_38_f0 = {
4664 I(DstReg | SrcMem | Mov, em_movbe), N
4667 static const struct instr_dual instr_dual_0f_38_f1 = {
4668 I(DstMem | SrcReg | Mov, em_movbe), N
4671 static const struct gprefix three_byte_0f_38_f0 = {
4672 ID(0, &instr_dual_0f_38_f0), N, N, N
4675 static const struct gprefix three_byte_0f_38_f1 = {
4676 ID(0, &instr_dual_0f_38_f1), N, N, N
4680 * Insns below are selected by the prefix which indexed by the third opcode
4683 static const struct opcode opcode_map_0f_38[256] = {
4685 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4687 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4689 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4690 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4711 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4715 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4721 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4722 unsigned size, bool sign_extension)
4724 int rc = X86EMUL_CONTINUE;
4728 op->addr.mem.ea = ctxt->_eip;
4729 /* NB. Immediates are sign-extended as necessary. */
4730 switch (op->bytes) {
4732 op->val = insn_fetch(s8, ctxt);
4735 op->val = insn_fetch(s16, ctxt);
4738 op->val = insn_fetch(s32, ctxt);
4741 op->val = insn_fetch(s64, ctxt);
4744 if (!sign_extension) {
4745 switch (op->bytes) {
4753 op->val &= 0xffffffff;
4761 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4764 int rc = X86EMUL_CONTINUE;
4768 decode_register_operand(ctxt, op);
4771 rc = decode_imm(ctxt, op, 1, false);
4774 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4778 if (ctxt->d & BitOp)
4779 fetch_bit_operand(ctxt);
4780 op->orig_val = op->val;
4783 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4787 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4788 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4789 fetch_register_operand(op);
4790 op->orig_val = op->val;
4794 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4795 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4796 fetch_register_operand(op);
4797 op->orig_val = op->val;
4800 if (ctxt->d & ByteOp) {
4805 op->bytes = ctxt->op_bytes;
4806 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4807 fetch_register_operand(op);
4808 op->orig_val = op->val;
4812 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4814 register_address(ctxt, VCPU_REGS_RDI);
4815 op->addr.mem.seg = VCPU_SREG_ES;
4822 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4823 fetch_register_operand(op);
4828 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4831 rc = decode_imm(ctxt, op, 1, true);
4839 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4842 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4845 ctxt->memop.bytes = 1;
4846 if (ctxt->memop.type == OP_REG) {
4847 ctxt->memop.addr.reg = decode_register(ctxt,
4848 ctxt->modrm_rm, true);
4849 fetch_register_operand(&ctxt->memop);
4853 ctxt->memop.bytes = 2;
4856 ctxt->memop.bytes = 4;
4859 rc = decode_imm(ctxt, op, 2, false);
4862 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4866 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4868 register_address(ctxt, VCPU_REGS_RSI);
4869 op->addr.mem.seg = ctxt->seg_override;
4875 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4878 reg_read(ctxt, VCPU_REGS_RBX) +
4879 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4880 op->addr.mem.seg = ctxt->seg_override;
4885 op->addr.mem.ea = ctxt->_eip;
4886 op->bytes = ctxt->op_bytes + 2;
4887 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4890 ctxt->memop.bytes = ctxt->op_bytes + 2;
4894 op->val = VCPU_SREG_ES;
4898 op->val = VCPU_SREG_CS;
4902 op->val = VCPU_SREG_SS;
4906 op->val = VCPU_SREG_DS;
4910 op->val = VCPU_SREG_FS;
4914 op->val = VCPU_SREG_GS;
4917 /* Special instructions do their own operand decoding. */
4919 op->type = OP_NONE; /* Disable writeback. */
4927 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4929 int rc = X86EMUL_CONTINUE;
4930 int mode = ctxt->mode;
4931 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4932 bool op_prefix = false;
4933 bool has_seg_override = false;
4934 struct opcode opcode;
4936 ctxt->memop.type = OP_NONE;
4937 ctxt->memopp = NULL;
4938 ctxt->_eip = ctxt->eip;
4939 ctxt->fetch.ptr = ctxt->fetch.data;
4940 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4941 ctxt->opcode_len = 1;
4943 memcpy(ctxt->fetch.data, insn, insn_len);
4945 rc = __do_insn_fetch_bytes(ctxt, 1);
4946 if (rc != X86EMUL_CONTINUE)
4951 case X86EMUL_MODE_REAL:
4952 case X86EMUL_MODE_VM86:
4953 case X86EMUL_MODE_PROT16:
4954 def_op_bytes = def_ad_bytes = 2;
4956 case X86EMUL_MODE_PROT32:
4957 def_op_bytes = def_ad_bytes = 4;
4959 #ifdef CONFIG_X86_64
4960 case X86EMUL_MODE_PROT64:
4966 return EMULATION_FAILED;
4969 ctxt->op_bytes = def_op_bytes;
4970 ctxt->ad_bytes = def_ad_bytes;
4972 /* Legacy prefixes. */
4974 switch (ctxt->b = insn_fetch(u8, ctxt)) {
4975 case 0x66: /* operand-size override */
4977 /* switch between 2/4 bytes */
4978 ctxt->op_bytes = def_op_bytes ^ 6;
4980 case 0x67: /* address-size override */
4981 if (mode == X86EMUL_MODE_PROT64)
4982 /* switch between 4/8 bytes */
4983 ctxt->ad_bytes = def_ad_bytes ^ 12;
4985 /* switch between 2/4 bytes */
4986 ctxt->ad_bytes = def_ad_bytes ^ 6;
4988 case 0x26: /* ES override */
4989 case 0x2e: /* CS override */
4990 case 0x36: /* SS override */
4991 case 0x3e: /* DS override */
4992 has_seg_override = true;
4993 ctxt->seg_override = (ctxt->b >> 3) & 3;
4995 case 0x64: /* FS override */
4996 case 0x65: /* GS override */
4997 has_seg_override = true;
4998 ctxt->seg_override = ctxt->b & 7;
5000 case 0x40 ... 0x4f: /* REX */
5001 if (mode != X86EMUL_MODE_PROT64)
5003 ctxt->rex_prefix = ctxt->b;
5005 case 0xf0: /* LOCK */
5006 ctxt->lock_prefix = 1;
5008 case 0xf2: /* REPNE/REPNZ */
5009 case 0xf3: /* REP/REPE/REPZ */
5010 ctxt->rep_prefix = ctxt->b;
5016 /* Any legacy prefix after a REX prefix nullifies its effect. */
5018 ctxt->rex_prefix = 0;
5024 if (ctxt->rex_prefix & 8)
5025 ctxt->op_bytes = 8; /* REX.W */
5027 /* Opcode byte(s). */
5028 opcode = opcode_table[ctxt->b];
5029 /* Two-byte opcode? */
5030 if (ctxt->b == 0x0f) {
5031 ctxt->opcode_len = 2;
5032 ctxt->b = insn_fetch(u8, ctxt);
5033 opcode = twobyte_table[ctxt->b];
5035 /* 0F_38 opcode map */
5036 if (ctxt->b == 0x38) {
5037 ctxt->opcode_len = 3;
5038 ctxt->b = insn_fetch(u8, ctxt);
5039 opcode = opcode_map_0f_38[ctxt->b];
5042 ctxt->d = opcode.flags;
5044 if (ctxt->d & ModRM)
5045 ctxt->modrm = insn_fetch(u8, ctxt);
5047 /* vex-prefix instructions are not implemented */
5048 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
5049 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
5053 while (ctxt->d & GroupMask) {
5054 switch (ctxt->d & GroupMask) {
5056 goffset = (ctxt->modrm >> 3) & 7;
5057 opcode = opcode.u.group[goffset];
5060 goffset = (ctxt->modrm >> 3) & 7;
5061 if ((ctxt->modrm >> 6) == 3)
5062 opcode = opcode.u.gdual->mod3[goffset];
5064 opcode = opcode.u.gdual->mod012[goffset];
5067 goffset = ctxt->modrm & 7;
5068 opcode = opcode.u.group[goffset];
5071 if (ctxt->rep_prefix && op_prefix)
5072 return EMULATION_FAILED;
5073 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
5074 switch (simd_prefix) {
5075 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
5076 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
5077 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
5078 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
5082 if (ctxt->modrm > 0xbf)
5083 opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
5085 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
5088 if ((ctxt->modrm >> 6) == 3)
5089 opcode = opcode.u.idual->mod3;
5091 opcode = opcode.u.idual->mod012;
5094 if (ctxt->mode == X86EMUL_MODE_PROT64)
5095 opcode = opcode.u.mdual->mode64;
5097 opcode = opcode.u.mdual->mode32;
5100 return EMULATION_FAILED;
5103 ctxt->d &= ~(u64)GroupMask;
5104 ctxt->d |= opcode.flags;
5109 return EMULATION_FAILED;
5111 ctxt->execute = opcode.u.execute;
5113 if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
5114 return EMULATION_FAILED;
5116 if (unlikely(ctxt->d &
5117 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
5120 * These are copied unconditionally here, and checked unconditionally
5121 * in x86_emulate_insn.
5123 ctxt->check_perm = opcode.check_perm;
5124 ctxt->intercept = opcode.intercept;
5126 if (ctxt->d & NotImpl)
5127 return EMULATION_FAILED;
5129 if (mode == X86EMUL_MODE_PROT64) {
5130 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
5132 else if (ctxt->d & NearBranch)
5136 if (ctxt->d & Op3264) {
5137 if (mode == X86EMUL_MODE_PROT64)
5143 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5147 ctxt->op_bytes = 16;
5148 else if (ctxt->d & Mmx)
5152 /* ModRM and SIB bytes. */
5153 if (ctxt->d & ModRM) {
5154 rc = decode_modrm(ctxt, &ctxt->memop);
5155 if (!has_seg_override) {
5156 has_seg_override = true;
5157 ctxt->seg_override = ctxt->modrm_seg;
5159 } else if (ctxt->d & MemAbs)
5160 rc = decode_abs(ctxt, &ctxt->memop);
5161 if (rc != X86EMUL_CONTINUE)
5164 if (!has_seg_override)
5165 ctxt->seg_override = VCPU_SREG_DS;
5167 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5170 * Decode and fetch the source operand: register, memory
5173 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5174 if (rc != X86EMUL_CONTINUE)
5178 * Decode and fetch the second source operand: register, memory
5181 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5182 if (rc != X86EMUL_CONTINUE)
5185 /* Decode and fetch the destination operand: register or memory. */
5186 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5188 if (ctxt->rip_relative && likely(ctxt->memopp))
5189 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5190 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5193 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5196 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5198 return ctxt->d & PageTable;
5201 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5203 /* The second termination condition only applies for REPE
5204 * and REPNE. Test if the repeat string operation prefix is
5205 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5206 * corresponding termination condition according to:
5207 * - if REPE/REPZ and ZF = 0 then done
5208 * - if REPNE/REPNZ and ZF = 1 then done
5210 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5211 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5212 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5213 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5214 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5215 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5221 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5225 ctxt->ops->get_fpu(ctxt);
5226 rc = asm_safe("fwait");
5227 ctxt->ops->put_fpu(ctxt);
5229 if (unlikely(rc != X86EMUL_CONTINUE))
5230 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5232 return X86EMUL_CONTINUE;
5235 static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
5238 if (op->type == OP_MM)
5239 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
5242 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
5244 register void *__sp asm(_ASM_SP);
5245 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5247 if (!(ctxt->d & ByteOp))
5248 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5250 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
5251 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5252 [fastop]"+S"(fop), "+r"(__sp)
5253 : "c"(ctxt->src2.val));
5255 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5256 if (!fop) /* exception is returned in fop variable */
5257 return emulate_de(ctxt);
5258 return X86EMUL_CONTINUE;
5261 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5263 memset(&ctxt->rip_relative, 0,
5264 (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
5266 ctxt->io_read.pos = 0;
5267 ctxt->io_read.end = 0;
5268 ctxt->mem_read.end = 0;
5271 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5273 const struct x86_emulate_ops *ops = ctxt->ops;
5274 int rc = X86EMUL_CONTINUE;
5275 int saved_dst_type = ctxt->dst.type;
5277 ctxt->mem_read.pos = 0;
5279 /* LOCK prefix is allowed only with some instructions */
5280 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5281 rc = emulate_ud(ctxt);
5285 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5286 rc = emulate_ud(ctxt);
5290 if (unlikely(ctxt->d &
5291 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5292 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5293 (ctxt->d & Undefined)) {
5294 rc = emulate_ud(ctxt);
5298 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5299 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5300 rc = emulate_ud(ctxt);
5304 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5305 rc = emulate_nm(ctxt);
5309 if (ctxt->d & Mmx) {
5310 rc = flush_pending_x87_faults(ctxt);
5311 if (rc != X86EMUL_CONTINUE)
5314 * Now that we know the fpu is exception safe, we can fetch
5317 fetch_possible_mmx_operand(ctxt, &ctxt->src);
5318 fetch_possible_mmx_operand(ctxt, &ctxt->src2);
5319 if (!(ctxt->d & Mov))
5320 fetch_possible_mmx_operand(ctxt, &ctxt->dst);
5323 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
5324 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5325 X86_ICPT_PRE_EXCEPT);
5326 if (rc != X86EMUL_CONTINUE)
5330 /* Instruction can only be executed in protected mode */
5331 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5332 rc = emulate_ud(ctxt);
5336 /* Privileged instruction can be executed only in CPL=0 */
5337 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5338 if (ctxt->d & PrivUD)
5339 rc = emulate_ud(ctxt);
5341 rc = emulate_gp(ctxt, 0);
5345 /* Do instruction specific permission checks */
5346 if (ctxt->d & CheckPerm) {
5347 rc = ctxt->check_perm(ctxt);
5348 if (rc != X86EMUL_CONTINUE)
5352 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5353 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5354 X86_ICPT_POST_EXCEPT);
5355 if (rc != X86EMUL_CONTINUE)
5359 if (ctxt->rep_prefix && (ctxt->d & String)) {
5360 /* All REP prefixes have the same first termination condition */
5361 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5362 string_registers_quirk(ctxt);
5363 ctxt->eip = ctxt->_eip;
5364 ctxt->eflags &= ~X86_EFLAGS_RF;
5370 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5371 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5372 ctxt->src.valptr, ctxt->src.bytes);
5373 if (rc != X86EMUL_CONTINUE)
5375 ctxt->src.orig_val64 = ctxt->src.val64;
5378 if (ctxt->src2.type == OP_MEM) {
5379 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5380 &ctxt->src2.val, ctxt->src2.bytes);
5381 if (rc != X86EMUL_CONTINUE)
5385 if ((ctxt->d & DstMask) == ImplicitOps)
5389 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5390 /* optimisation - avoid slow emulated read if Mov */
5391 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5392 &ctxt->dst.val, ctxt->dst.bytes);
5393 if (rc != X86EMUL_CONTINUE) {
5394 if (!(ctxt->d & NoWrite) &&
5395 rc == X86EMUL_PROPAGATE_FAULT &&
5396 ctxt->exception.vector == PF_VECTOR)
5397 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5401 /* Copy full 64-bit value for CMPXCHG8B. */
5402 ctxt->dst.orig_val64 = ctxt->dst.val64;
5406 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5407 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5408 X86_ICPT_POST_MEMACCESS);
5409 if (rc != X86EMUL_CONTINUE)
5413 if (ctxt->rep_prefix && (ctxt->d & String))
5414 ctxt->eflags |= X86_EFLAGS_RF;
5416 ctxt->eflags &= ~X86_EFLAGS_RF;
5418 if (ctxt->execute) {
5419 if (ctxt->d & Fastop) {
5420 void (*fop)(struct fastop *) = (void *)ctxt->execute;
5421 rc = fastop(ctxt, fop);
5422 if (rc != X86EMUL_CONTINUE)
5426 rc = ctxt->execute(ctxt);
5427 if (rc != X86EMUL_CONTINUE)
5432 if (ctxt->opcode_len == 2)
5434 else if (ctxt->opcode_len == 3)
5435 goto threebyte_insn;
5438 case 0x70 ... 0x7f: /* jcc (short) */
5439 if (test_cc(ctxt->b, ctxt->eflags))
5440 rc = jmp_rel(ctxt, ctxt->src.val);
5442 case 0x8d: /* lea r16/r32, m */
5443 ctxt->dst.val = ctxt->src.addr.mem.ea;
5445 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5446 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5447 ctxt->dst.type = OP_NONE;
5451 case 0x98: /* cbw/cwde/cdqe */
5452 switch (ctxt->op_bytes) {
5453 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5454 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5455 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5458 case 0xcc: /* int3 */
5459 rc = emulate_int(ctxt, 3);
5461 case 0xcd: /* int n */
5462 rc = emulate_int(ctxt, ctxt->src.val);
5464 case 0xce: /* into */
5465 if (ctxt->eflags & X86_EFLAGS_OF)
5466 rc = emulate_int(ctxt, 4);
5468 case 0xe9: /* jmp rel */
5469 case 0xeb: /* jmp rel short */
5470 rc = jmp_rel(ctxt, ctxt->src.val);
5471 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5473 case 0xf4: /* hlt */
5474 ctxt->ops->halt(ctxt);
5476 case 0xf5: /* cmc */
5477 /* complement carry flag from eflags reg */
5478 ctxt->eflags ^= X86_EFLAGS_CF;
5480 case 0xf8: /* clc */
5481 ctxt->eflags &= ~X86_EFLAGS_CF;
5483 case 0xf9: /* stc */
5484 ctxt->eflags |= X86_EFLAGS_CF;
5486 case 0xfc: /* cld */
5487 ctxt->eflags &= ~X86_EFLAGS_DF;
5489 case 0xfd: /* std */
5490 ctxt->eflags |= X86_EFLAGS_DF;
5493 goto cannot_emulate;
5496 if (rc != X86EMUL_CONTINUE)
5500 if (ctxt->d & SrcWrite) {
5501 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5502 rc = writeback(ctxt, &ctxt->src);
5503 if (rc != X86EMUL_CONTINUE)
5506 if (!(ctxt->d & NoWrite)) {
5507 rc = writeback(ctxt, &ctxt->dst);
5508 if (rc != X86EMUL_CONTINUE)
5513 * restore dst type in case the decoding will be reused
5514 * (happens for string instruction )
5516 ctxt->dst.type = saved_dst_type;
5518 if ((ctxt->d & SrcMask) == SrcSI)
5519 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5521 if ((ctxt->d & DstMask) == DstDI)
5522 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5524 if (ctxt->rep_prefix && (ctxt->d & String)) {
5526 struct read_cache *r = &ctxt->io_read;
5527 if ((ctxt->d & SrcMask) == SrcSI)
5528 count = ctxt->src.count;
5530 count = ctxt->dst.count;
5531 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5533 if (!string_insn_completed(ctxt)) {
5535 * Re-enter guest when pio read ahead buffer is empty
5536 * or, if it is not used, after each 1024 iteration.
5538 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5539 (r->end == 0 || r->end != r->pos)) {
5541 * Reset read cache. Usually happens before
5542 * decode, but since instruction is restarted
5543 * we have to do it here.
5545 ctxt->mem_read.end = 0;
5546 writeback_registers(ctxt);
5547 return EMULATION_RESTART;
5549 goto done; /* skip rip writeback */
5551 ctxt->eflags &= ~X86_EFLAGS_RF;
5554 ctxt->eip = ctxt->_eip;
5557 if (rc == X86EMUL_PROPAGATE_FAULT) {
5558 WARN_ON(ctxt->exception.vector > 0x1f);
5559 ctxt->have_exception = true;
5561 if (rc == X86EMUL_INTERCEPTED)
5562 return EMULATION_INTERCEPTED;
5564 if (rc == X86EMUL_CONTINUE)
5565 writeback_registers(ctxt);
5567 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5571 case 0x09: /* wbinvd */
5572 (ctxt->ops->wbinvd)(ctxt);
5574 case 0x08: /* invd */
5575 case 0x0d: /* GrpP (prefetch) */
5576 case 0x18: /* Grp16 (prefetch/nop) */
5577 case 0x1f: /* nop */
5579 case 0x20: /* mov cr, reg */
5580 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5582 case 0x21: /* mov from dr to reg */
5583 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5585 case 0x40 ... 0x4f: /* cmov */
5586 if (test_cc(ctxt->b, ctxt->eflags))
5587 ctxt->dst.val = ctxt->src.val;
5588 else if (ctxt->op_bytes != 4)
5589 ctxt->dst.type = OP_NONE; /* no writeback */
5591 case 0x80 ... 0x8f: /* jnz rel, etc*/
5592 if (test_cc(ctxt->b, ctxt->eflags))
5593 rc = jmp_rel(ctxt, ctxt->src.val);
5595 case 0x90 ... 0x9f: /* setcc r/m8 */
5596 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5598 case 0xb6 ... 0xb7: /* movzx */
5599 ctxt->dst.bytes = ctxt->op_bytes;
5600 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5601 : (u16) ctxt->src.val;
5603 case 0xbe ... 0xbf: /* movsx */
5604 ctxt->dst.bytes = ctxt->op_bytes;
5605 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5606 (s16) ctxt->src.val;
5609 goto cannot_emulate;
5614 if (rc != X86EMUL_CONTINUE)
5620 return EMULATION_FAILED;
5623 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5625 invalidate_registers(ctxt);
5628 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5630 writeback_registers(ctxt);