1 // SPDX-License-Identifier: GPL-2.0-only
2 /******************************************************************************
5 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
7 * Copyright (c) 2005 Keir Fraser
9 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
10 * privileged instructions:
12 * Copyright (C) 2006 Qumranet
13 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
15 * Avi Kivity <avi@qumranet.com>
16 * Yaniv Kamay <yaniv@qumranet.com>
18 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
20 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/kvm_host.h>
23 #include "kvm_cache_regs.h"
24 #include "kvm_emulate.h"
25 #include <linux/stringify.h>
26 #include <asm/debugreg.h>
27 #include <asm/nospec-branch.h>
39 #define OpImplicit 1ull /* No generic decode */
40 #define OpReg 2ull /* Register */
41 #define OpMem 3ull /* Memory */
42 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
43 #define OpDI 5ull /* ES:DI/EDI/RDI */
44 #define OpMem64 6ull /* Memory, 64-bit */
45 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
46 #define OpDX 8ull /* DX register */
47 #define OpCL 9ull /* CL register (for shifts) */
48 #define OpImmByte 10ull /* 8-bit sign extended immediate */
49 #define OpOne 11ull /* Implied 1 */
50 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
51 #define OpMem16 13ull /* Memory operand (16-bit). */
52 #define OpMem32 14ull /* Memory operand (32-bit). */
53 #define OpImmU 15ull /* Immediate operand, zero extended */
54 #define OpSI 16ull /* SI/ESI/RSI */
55 #define OpImmFAddr 17ull /* Immediate far address */
56 #define OpMemFAddr 18ull /* Far address in memory */
57 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
58 #define OpES 20ull /* ES */
59 #define OpCS 21ull /* CS */
60 #define OpSS 22ull /* SS */
61 #define OpDS 23ull /* DS */
62 #define OpFS 24ull /* FS */
63 #define OpGS 25ull /* GS */
64 #define OpMem8 26ull /* 8-bit zero extended memory operand */
65 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
66 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
67 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
68 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
70 #define OpBits 5 /* Width of operand field */
71 #define OpMask ((1ull << OpBits) - 1)
74 * Opcode effective-address decode tables.
75 * Note that we only emulate instructions that have at least one memory
76 * operand (excluding implicit stack references). We assume that stack
77 * references and instruction fetches will never occur in special memory
78 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
82 /* Operand sizes: 8-bit operands or specified/overridden size. */
83 #define ByteOp (1<<0) /* 8-bit operands. */
84 /* Destination operand type. */
86 #define ImplicitOps (OpImplicit << DstShift)
87 #define DstReg (OpReg << DstShift)
88 #define DstMem (OpMem << DstShift)
89 #define DstAcc (OpAcc << DstShift)
90 #define DstDI (OpDI << DstShift)
91 #define DstMem64 (OpMem64 << DstShift)
92 #define DstMem16 (OpMem16 << DstShift)
93 #define DstImmUByte (OpImmUByte << DstShift)
94 #define DstDX (OpDX << DstShift)
95 #define DstAccLo (OpAccLo << DstShift)
96 #define DstMask (OpMask << DstShift)
97 /* Source operand type. */
99 #define SrcNone (OpNone << SrcShift)
100 #define SrcReg (OpReg << SrcShift)
101 #define SrcMem (OpMem << SrcShift)
102 #define SrcMem16 (OpMem16 << SrcShift)
103 #define SrcMem32 (OpMem32 << SrcShift)
104 #define SrcImm (OpImm << SrcShift)
105 #define SrcImmByte (OpImmByte << SrcShift)
106 #define SrcOne (OpOne << SrcShift)
107 #define SrcImmUByte (OpImmUByte << SrcShift)
108 #define SrcImmU (OpImmU << SrcShift)
109 #define SrcSI (OpSI << SrcShift)
110 #define SrcXLat (OpXLat << SrcShift)
111 #define SrcImmFAddr (OpImmFAddr << SrcShift)
112 #define SrcMemFAddr (OpMemFAddr << SrcShift)
113 #define SrcAcc (OpAcc << SrcShift)
114 #define SrcImmU16 (OpImmU16 << SrcShift)
115 #define SrcImm64 (OpImm64 << SrcShift)
116 #define SrcDX (OpDX << SrcShift)
117 #define SrcMem8 (OpMem8 << SrcShift)
118 #define SrcAccHi (OpAccHi << SrcShift)
119 #define SrcMask (OpMask << SrcShift)
120 #define BitOp (1<<11)
121 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
122 #define String (1<<13) /* String instruction (rep capable) */
123 #define Stack (1<<14) /* Stack instruction (push/pop) */
124 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
125 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
126 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
127 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
128 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
129 #define Escape (5<<15) /* Escape to coprocessor instruction */
130 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
131 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
132 #define Sse (1<<18) /* SSE Vector instruction */
133 /* Generic ModRM decode. */
134 #define ModRM (1<<19)
135 /* Destination is only written; never read. */
138 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
139 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
140 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
141 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
142 #define Undefined (1<<25) /* No Such Instruction */
143 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
144 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
146 #define PageTable (1 << 29) /* instruction used to write page table */
147 #define NotImpl (1 << 30) /* instruction is not implemented */
148 /* Source 2 operand type */
149 #define Src2Shift (31)
150 #define Src2None (OpNone << Src2Shift)
151 #define Src2Mem (OpMem << Src2Shift)
152 #define Src2CL (OpCL << Src2Shift)
153 #define Src2ImmByte (OpImmByte << Src2Shift)
154 #define Src2One (OpOne << Src2Shift)
155 #define Src2Imm (OpImm << Src2Shift)
156 #define Src2ES (OpES << Src2Shift)
157 #define Src2CS (OpCS << Src2Shift)
158 #define Src2SS (OpSS << Src2Shift)
159 #define Src2DS (OpDS << Src2Shift)
160 #define Src2FS (OpFS << Src2Shift)
161 #define Src2GS (OpGS << Src2Shift)
162 #define Src2Mask (OpMask << Src2Shift)
163 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
164 #define AlignMask ((u64)7 << 41)
165 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
166 #define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
167 #define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
168 #define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
169 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
170 #define NoWrite ((u64)1 << 45) /* No writeback */
171 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
172 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
173 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
174 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
175 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
176 #define NearBranch ((u64)1 << 52) /* Near branches */
177 #define No16 ((u64)1 << 53) /* No 16 bit operand */
178 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
179 #define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */
180 #define IsBranch ((u64)1 << 56) /* Instruction is considered a branch. */
182 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
184 #define X2(x...) x, x
185 #define X3(x...) X2(x), x
186 #define X4(x...) X2(x), X2(x)
187 #define X5(x...) X4(x), x
188 #define X6(x...) X4(x), X2(x)
189 #define X7(x...) X4(x), X3(x)
190 #define X8(x...) X4(x), X4(x)
191 #define X16(x...) X8(x), X8(x)
198 int (*execute)(struct x86_emulate_ctxt *ctxt);
199 const struct opcode *group;
200 const struct group_dual *gdual;
201 const struct gprefix *gprefix;
202 const struct escape *esc;
203 const struct instr_dual *idual;
204 const struct mode_dual *mdual;
205 void (*fastop)(struct fastop *fake);
207 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
211 struct opcode mod012[8];
212 struct opcode mod3[8];
216 struct opcode pfx_no;
217 struct opcode pfx_66;
218 struct opcode pfx_f2;
219 struct opcode pfx_f3;
224 struct opcode high[64];
228 struct opcode mod012;
233 struct opcode mode32;
234 struct opcode mode64;
237 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
239 enum x86_transfer_type {
241 X86_TRANSFER_CALL_JMP,
243 X86_TRANSFER_TASK_SWITCH,
246 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
248 unsigned long dirty = ctxt->regs_dirty;
251 for_each_set_bit(reg, &dirty, NR_EMULATOR_GPRS)
252 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
255 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
257 ctxt->regs_dirty = 0;
258 ctxt->regs_valid = 0;
262 * These EFLAGS bits are restored from saved value during emulation, and
263 * any changes are written back to the saved value after emulation.
265 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
266 X86_EFLAGS_PF|X86_EFLAGS_CF)
275 * fastop functions have a special calling convention:
280 * flags: rflags (in/out)
281 * ex: rsi (in:fastop pointer, out:zero if exception)
283 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
284 * different operand sizes can be reached by calculation, rather than a jump
285 * table (which would be bigger than the code).
287 * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
288 * and 1 for the straight line speculation INT3, leaves 7 bytes for the
289 * body of the function. Currently none is larger than 4.
291 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
293 #define FASTOP_SIZE 16
295 #define __FOP_FUNC(name) \
296 ".align " __stringify(FASTOP_SIZE) " \n\t" \
297 ".type " name ", @function \n\t" \
302 #define FOP_FUNC(name) \
305 #define __FOP_RET(name) \
307 ".size " name ", .-" name "\n\t"
309 #define FOP_RET(name) \
312 #define __FOP_START(op, align) \
313 extern void em_##op(struct fastop *fake); \
314 asm(".pushsection .text, \"ax\" \n\t" \
315 ".global em_" #op " \n\t" \
316 ".align " __stringify(align) " \n\t" \
319 #define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
324 #define __FOPNOP(name) \
329 __FOPNOP(__stringify(__UNIQUE_ID(nop)))
331 #define FOP1E(op, dst) \
332 __FOP_FUNC(#op "_" #dst) \
333 "10: " #op " %" #dst " \n\t" \
334 __FOP_RET(#op "_" #dst)
336 #define FOP1EEX(op, dst) \
337 FOP1E(op, dst) _ASM_EXTABLE_TYPE_REG(10b, 11b, EX_TYPE_ZERO_REG, %%esi)
339 #define FASTOP1(op) \
344 ON64(FOP1E(op##q, rax)) \
347 /* 1-operand, using src2 (for MUL/DIV r/m) */
348 #define FASTOP1SRC2(op, name) \
353 ON64(FOP1E(op, rcx)) \
356 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
357 #define FASTOP1SRC2EX(op, name) \
362 ON64(FOP1EEX(op, rcx)) \
365 #define FOP2E(op, dst, src) \
366 __FOP_FUNC(#op "_" #dst "_" #src) \
367 #op " %" #src ", %" #dst " \n\t" \
368 __FOP_RET(#op "_" #dst "_" #src)
370 #define FASTOP2(op) \
372 FOP2E(op##b, al, dl) \
373 FOP2E(op##w, ax, dx) \
374 FOP2E(op##l, eax, edx) \
375 ON64(FOP2E(op##q, rax, rdx)) \
378 /* 2 operand, word only */
379 #define FASTOP2W(op) \
382 FOP2E(op##w, ax, dx) \
383 FOP2E(op##l, eax, edx) \
384 ON64(FOP2E(op##q, rax, rdx)) \
387 /* 2 operand, src is CL */
388 #define FASTOP2CL(op) \
390 FOP2E(op##b, al, cl) \
391 FOP2E(op##w, ax, cl) \
392 FOP2E(op##l, eax, cl) \
393 ON64(FOP2E(op##q, rax, cl)) \
396 /* 2 operand, src and dest are reversed */
397 #define FASTOP2R(op, name) \
399 FOP2E(op##b, dl, al) \
400 FOP2E(op##w, dx, ax) \
401 FOP2E(op##l, edx, eax) \
402 ON64(FOP2E(op##q, rdx, rax)) \
405 #define FOP3E(op, dst, src, src2) \
406 __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
407 #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
408 __FOP_RET(#op "_" #dst "_" #src "_" #src2)
410 /* 3-operand, word-only, src2=cl */
411 #define FASTOP3WCL(op) \
414 FOP3E(op##w, ax, dx, cl) \
415 FOP3E(op##l, eax, edx, cl) \
416 ON64(FOP3E(op##q, rax, rdx, cl)) \
419 /* Special case for SETcc - 1 instruction per cc */
420 #define FOP_SETCC(op) \
446 "pushf; sbb %al, %al; popf \n\t"
451 * XXX: inoutclob user must know where the argument is being expanded.
452 * Using asm goto would allow us to remove _fault.
454 #define asm_safe(insn, inoutclob...) \
458 asm volatile("1:" insn "\n" \
460 _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %[_fault]) \
461 : [_fault] "+r"(_fault) inoutclob ); \
463 _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
466 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
467 enum x86_intercept intercept,
468 enum x86_intercept_stage stage)
470 struct x86_instruction_info info = {
471 .intercept = intercept,
472 .rep_prefix = ctxt->rep_prefix,
473 .modrm_mod = ctxt->modrm_mod,
474 .modrm_reg = ctxt->modrm_reg,
475 .modrm_rm = ctxt->modrm_rm,
476 .src_val = ctxt->src.val64,
477 .dst_val = ctxt->dst.val64,
478 .src_bytes = ctxt->src.bytes,
479 .dst_bytes = ctxt->dst.bytes,
480 .ad_bytes = ctxt->ad_bytes,
481 .next_rip = ctxt->eip,
484 return ctxt->ops->intercept(ctxt, &info, stage);
487 static void assign_masked(ulong *dest, ulong src, ulong mask)
489 *dest = (*dest & ~mask) | (src & mask);
492 static void assign_register(unsigned long *reg, u64 val, int bytes)
494 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
497 *(u8 *)reg = (u8)val;
500 *(u16 *)reg = (u16)val;
504 break; /* 64b: zero-extend */
511 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
513 return (1UL << (ctxt->ad_bytes << 3)) - 1;
516 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
519 struct desc_struct ss;
521 if (ctxt->mode == X86EMUL_MODE_PROT64)
523 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
524 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
527 static int stack_size(struct x86_emulate_ctxt *ctxt)
529 return (__fls(stack_mask(ctxt)) + 1) >> 3;
532 /* Access/update address held in a register, based on addressing mode. */
533 static inline unsigned long
534 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
536 if (ctxt->ad_bytes == sizeof(unsigned long))
539 return reg & ad_mask(ctxt);
542 static inline unsigned long
543 register_address(struct x86_emulate_ctxt *ctxt, int reg)
545 return address_mask(ctxt, reg_read(ctxt, reg));
548 static void masked_increment(ulong *reg, ulong mask, int inc)
550 assign_masked(reg, *reg + inc, mask);
554 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
556 ulong *preg = reg_rmw(ctxt, reg);
558 assign_register(preg, *preg + inc, ctxt->ad_bytes);
561 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
563 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
566 static u32 desc_limit_scaled(struct desc_struct *desc)
568 u32 limit = get_desc_limit(desc);
570 return desc->g ? (limit << 12) | 0xfff : limit;
573 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
575 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
578 return ctxt->ops->get_cached_segment_base(ctxt, seg);
581 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
582 u32 error, bool valid)
584 if (KVM_EMULATOR_BUG_ON(vec > 0x1f, ctxt))
585 return X86EMUL_UNHANDLEABLE;
587 ctxt->exception.vector = vec;
588 ctxt->exception.error_code = error;
589 ctxt->exception.error_code_valid = valid;
590 return X86EMUL_PROPAGATE_FAULT;
593 static int emulate_db(struct x86_emulate_ctxt *ctxt)
595 return emulate_exception(ctxt, DB_VECTOR, 0, false);
598 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
600 return emulate_exception(ctxt, GP_VECTOR, err, true);
603 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
605 return emulate_exception(ctxt, SS_VECTOR, err, true);
608 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
610 return emulate_exception(ctxt, UD_VECTOR, 0, false);
613 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
615 return emulate_exception(ctxt, TS_VECTOR, err, true);
618 static int emulate_de(struct x86_emulate_ctxt *ctxt)
620 return emulate_exception(ctxt, DE_VECTOR, 0, false);
623 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
625 return emulate_exception(ctxt, NM_VECTOR, 0, false);
628 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
631 struct desc_struct desc;
633 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
637 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
642 struct desc_struct desc;
644 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
645 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
648 static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
650 return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
653 static inline bool emul_is_noncanonical_address(u64 la,
654 struct x86_emulate_ctxt *ctxt)
656 return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt));
660 * x86 defines three classes of vector instructions: explicitly
661 * aligned, explicitly unaligned, and the rest, which change behaviour
662 * depending on whether they're AVX encoded or not.
664 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
665 * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
666 * 512 bytes of data must be aligned to a 16 byte boundary.
668 static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
670 u64 alignment = ctxt->d & AlignMask;
672 if (likely(size < 16))
687 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
688 struct segmented_address addr,
689 unsigned *max_size, unsigned size,
690 bool write, bool fetch,
691 enum x86emul_mode mode, ulong *linear)
693 struct desc_struct desc;
700 la = seg_base(ctxt, addr.seg) + addr.ea;
703 case X86EMUL_MODE_PROT64:
705 va_bits = ctxt_virt_addr_bits(ctxt);
706 if (!__is_canonical_address(la, va_bits))
709 *max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
710 if (size > *max_size)
714 *linear = la = (u32)la;
715 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
719 /* code segment in protected mode or read-only data segment */
720 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
721 || !(desc.type & 2)) && write)
723 /* unreadable code segment */
724 if (!fetch && (desc.type & 8) && !(desc.type & 2))
726 lim = desc_limit_scaled(&desc);
727 if (!(desc.type & 8) && (desc.type & 4)) {
728 /* expand-down segment */
731 lim = desc.d ? 0xffffffff : 0xffff;
735 if (lim == 0xffffffff)
738 *max_size = (u64)lim + 1 - addr.ea;
739 if (size > *max_size)
744 if (la & (insn_alignment(ctxt, size) - 1))
745 return emulate_gp(ctxt, 0);
746 return X86EMUL_CONTINUE;
748 if (addr.seg == VCPU_SREG_SS)
749 return emulate_ss(ctxt, 0);
751 return emulate_gp(ctxt, 0);
754 static int linearize(struct x86_emulate_ctxt *ctxt,
755 struct segmented_address addr,
756 unsigned size, bool write,
760 return __linearize(ctxt, addr, &max_size, size, write, false,
764 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
769 struct segmented_address addr = { .seg = VCPU_SREG_CS,
772 if (ctxt->op_bytes != sizeof(unsigned long))
773 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
774 rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
775 if (rc == X86EMUL_CONTINUE)
776 ctxt->_eip = addr.ea;
780 static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
783 struct desc_struct cs;
787 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
789 if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
790 /* Real mode. cpu must not have long mode active */
792 return X86EMUL_UNHANDLEABLE;
793 ctxt->mode = X86EMUL_MODE_REAL;
794 return X86EMUL_CONTINUE;
797 if (ctxt->eflags & X86_EFLAGS_VM) {
798 /* Protected/VM86 mode. cpu must not have long mode active */
800 return X86EMUL_UNHANDLEABLE;
801 ctxt->mode = X86EMUL_MODE_VM86;
802 return X86EMUL_CONTINUE;
805 if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
806 return X86EMUL_UNHANDLEABLE;
808 if (efer & EFER_LMA) {
810 /* Proper long mode */
811 ctxt->mode = X86EMUL_MODE_PROT64;
813 /* 32 bit compatibility mode*/
814 ctxt->mode = X86EMUL_MODE_PROT32;
816 ctxt->mode = X86EMUL_MODE_PROT16;
819 /* Legacy 32 bit / 16 bit mode */
820 ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
823 return X86EMUL_CONTINUE;
826 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
828 return assign_eip(ctxt, dst);
831 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
833 int rc = emulator_recalc_and_set_mode(ctxt);
835 if (rc != X86EMUL_CONTINUE)
838 return assign_eip(ctxt, dst);
841 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
843 return assign_eip_near(ctxt, ctxt->_eip + rel);
846 static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
847 void *data, unsigned size)
849 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
852 static int linear_write_system(struct x86_emulate_ctxt *ctxt,
853 ulong linear, void *data,
856 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
859 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
860 struct segmented_address addr,
867 rc = linearize(ctxt, addr, size, false, &linear);
868 if (rc != X86EMUL_CONTINUE)
870 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
873 static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
874 struct segmented_address addr,
881 rc = linearize(ctxt, addr, size, true, &linear);
882 if (rc != X86EMUL_CONTINUE)
884 return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
888 * Prefetch the remaining bytes of the instruction without crossing page
889 * boundary if they are not in fetch_cache yet.
891 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
894 unsigned size, max_size;
895 unsigned long linear;
896 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
897 struct segmented_address addr = { .seg = VCPU_SREG_CS,
898 .ea = ctxt->eip + cur_size };
901 * We do not know exactly how many bytes will be needed, and
902 * __linearize is expensive, so fetch as much as possible. We
903 * just have to avoid going beyond the 15 byte limit, the end
904 * of the segment, or the end of the page.
906 * __linearize is called with size 0 so that it does not do any
907 * boundary check itself. Instead, we use max_size to check
910 rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
912 if (unlikely(rc != X86EMUL_CONTINUE))
915 size = min_t(unsigned, 15UL ^ cur_size, max_size);
916 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
919 * One instruction can only straddle two pages,
920 * and one has been loaded at the beginning of
921 * x86_decode_insn. So, if not enough bytes
922 * still, we must have hit the 15-byte boundary.
924 if (unlikely(size < op_size))
925 return emulate_gp(ctxt, 0);
927 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
928 size, &ctxt->exception);
929 if (unlikely(rc != X86EMUL_CONTINUE))
931 ctxt->fetch.end += size;
932 return X86EMUL_CONTINUE;
935 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
938 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
940 if (unlikely(done_size < size))
941 return __do_insn_fetch_bytes(ctxt, size - done_size);
943 return X86EMUL_CONTINUE;
946 /* Fetch next part of the instruction being emulated. */
947 #define insn_fetch(_type, _ctxt) \
950 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
951 if (rc != X86EMUL_CONTINUE) \
953 ctxt->_eip += sizeof(_type); \
954 memcpy(&_x, ctxt->fetch.ptr, sizeof(_type)); \
955 ctxt->fetch.ptr += sizeof(_type); \
959 #define insn_fetch_arr(_arr, _size, _ctxt) \
961 rc = do_insn_fetch_bytes(_ctxt, _size); \
962 if (rc != X86EMUL_CONTINUE) \
964 ctxt->_eip += (_size); \
965 memcpy(_arr, ctxt->fetch.ptr, _size); \
966 ctxt->fetch.ptr += (_size); \
970 * Given the 'reg' portion of a ModRM byte, and a register block, return a
971 * pointer into the block that addresses the relevant register.
972 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
974 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
978 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
980 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
981 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
983 p = reg_rmw(ctxt, modrm_reg);
987 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
988 struct segmented_address addr,
989 u16 *size, unsigned long *address, int op_bytes)
996 rc = segmented_read_std(ctxt, addr, size, 2);
997 if (rc != X86EMUL_CONTINUE)
1000 rc = segmented_read_std(ctxt, addr, address, op_bytes);
1014 FASTOP1SRC2(mul, mul_ex);
1015 FASTOP1SRC2(imul, imul_ex);
1016 FASTOP1SRC2EX(div, div_ex);
1017 FASTOP1SRC2EX(idiv, idiv_ex);
1046 FASTOP2R(cmp, cmp_r);
1048 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
1050 /* If src is zero, do not writeback, but update flags */
1051 if (ctxt->src.val == 0)
1052 ctxt->dst.type = OP_NONE;
1053 return fastop(ctxt, em_bsf);
1056 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
1058 /* If src is zero, do not writeback, but update flags */
1059 if (ctxt->src.val == 0)
1060 ctxt->dst.type = OP_NONE;
1061 return fastop(ctxt, em_bsr);
1064 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
1067 void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf);
1069 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
1070 asm("push %[flags]; popf; " CALL_NOSPEC
1071 : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
1075 static void fetch_register_operand(struct operand *op)
1077 switch (op->bytes) {
1079 op->val = *(u8 *)op->addr.reg;
1082 op->val = *(u16 *)op->addr.reg;
1085 op->val = *(u32 *)op->addr.reg;
1088 op->val = *(u64 *)op->addr.reg;
1093 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1095 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1096 return emulate_nm(ctxt);
1099 asm volatile("fninit");
1101 return X86EMUL_CONTINUE;
1104 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1108 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1109 return emulate_nm(ctxt);
1112 asm volatile("fnstcw %0": "+m"(fcw));
1115 ctxt->dst.val = fcw;
1117 return X86EMUL_CONTINUE;
1120 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1124 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1125 return emulate_nm(ctxt);
1128 asm volatile("fnstsw %0": "+m"(fsw));
1131 ctxt->dst.val = fsw;
1133 return X86EMUL_CONTINUE;
1136 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1141 if (ctxt->d & ModRM)
1142 reg = ctxt->modrm_reg;
1144 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1146 if (ctxt->d & Sse) {
1150 kvm_read_sse_reg(reg, &op->vec_val);
1153 if (ctxt->d & Mmx) {
1162 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1163 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1165 fetch_register_operand(op);
1166 op->orig_val = op->val;
1169 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1171 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1172 ctxt->modrm_seg = VCPU_SREG_SS;
1175 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1179 int index_reg, base_reg, scale;
1180 int rc = X86EMUL_CONTINUE;
1183 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1184 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1185 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1187 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1188 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1189 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1190 ctxt->modrm_seg = VCPU_SREG_DS;
1192 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1194 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1195 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1197 if (ctxt->d & Sse) {
1200 op->addr.xmm = ctxt->modrm_rm;
1201 kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val);
1204 if (ctxt->d & Mmx) {
1207 op->addr.mm = ctxt->modrm_rm & 7;
1210 fetch_register_operand(op);
1216 if (ctxt->ad_bytes == 2) {
1217 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1218 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1219 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1220 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1222 /* 16-bit ModR/M decode. */
1223 switch (ctxt->modrm_mod) {
1225 if (ctxt->modrm_rm == 6)
1226 modrm_ea += insn_fetch(u16, ctxt);
1229 modrm_ea += insn_fetch(s8, ctxt);
1232 modrm_ea += insn_fetch(u16, ctxt);
1235 switch (ctxt->modrm_rm) {
1237 modrm_ea += bx + si;
1240 modrm_ea += bx + di;
1243 modrm_ea += bp + si;
1246 modrm_ea += bp + di;
1255 if (ctxt->modrm_mod != 0)
1262 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1263 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1264 ctxt->modrm_seg = VCPU_SREG_SS;
1265 modrm_ea = (u16)modrm_ea;
1267 /* 32/64-bit ModR/M decode. */
1268 if ((ctxt->modrm_rm & 7) == 4) {
1269 sib = insn_fetch(u8, ctxt);
1270 index_reg |= (sib >> 3) & 7;
1271 base_reg |= sib & 7;
1274 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1275 modrm_ea += insn_fetch(s32, ctxt);
1277 modrm_ea += reg_read(ctxt, base_reg);
1278 adjust_modrm_seg(ctxt, base_reg);
1279 /* Increment ESP on POP [ESP] */
1280 if ((ctxt->d & IncSP) &&
1281 base_reg == VCPU_REGS_RSP)
1282 modrm_ea += ctxt->op_bytes;
1285 modrm_ea += reg_read(ctxt, index_reg) << scale;
1286 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1287 modrm_ea += insn_fetch(s32, ctxt);
1288 if (ctxt->mode == X86EMUL_MODE_PROT64)
1289 ctxt->rip_relative = 1;
1291 base_reg = ctxt->modrm_rm;
1292 modrm_ea += reg_read(ctxt, base_reg);
1293 adjust_modrm_seg(ctxt, base_reg);
1295 switch (ctxt->modrm_mod) {
1297 modrm_ea += insn_fetch(s8, ctxt);
1300 modrm_ea += insn_fetch(s32, ctxt);
1304 op->addr.mem.ea = modrm_ea;
1305 if (ctxt->ad_bytes != 8)
1306 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1312 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1315 int rc = X86EMUL_CONTINUE;
1318 switch (ctxt->ad_bytes) {
1320 op->addr.mem.ea = insn_fetch(u16, ctxt);
1323 op->addr.mem.ea = insn_fetch(u32, ctxt);
1326 op->addr.mem.ea = insn_fetch(u64, ctxt);
1333 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1337 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1338 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1340 if (ctxt->src.bytes == 2)
1341 sv = (s16)ctxt->src.val & (s16)mask;
1342 else if (ctxt->src.bytes == 4)
1343 sv = (s32)ctxt->src.val & (s32)mask;
1345 sv = (s64)ctxt->src.val & (s64)mask;
1347 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1348 ctxt->dst.addr.mem.ea + (sv >> 3));
1351 /* only subword offset */
1352 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1355 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1356 unsigned long addr, void *dest, unsigned size)
1359 struct read_cache *mc = &ctxt->mem_read;
1361 if (mc->pos < mc->end)
1364 if (KVM_EMULATOR_BUG_ON((mc->end + size) >= sizeof(mc->data), ctxt))
1365 return X86EMUL_UNHANDLEABLE;
1367 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1369 if (rc != X86EMUL_CONTINUE)
1375 memcpy(dest, mc->data + mc->pos, size);
1377 return X86EMUL_CONTINUE;
1380 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1381 struct segmented_address addr,
1388 rc = linearize(ctxt, addr, size, false, &linear);
1389 if (rc != X86EMUL_CONTINUE)
1391 return read_emulated(ctxt, linear, data, size);
1394 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1395 struct segmented_address addr,
1402 rc = linearize(ctxt, addr, size, true, &linear);
1403 if (rc != X86EMUL_CONTINUE)
1405 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1409 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1410 struct segmented_address addr,
1411 const void *orig_data, const void *data,
1417 rc = linearize(ctxt, addr, size, true, &linear);
1418 if (rc != X86EMUL_CONTINUE)
1420 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1421 size, &ctxt->exception);
1424 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1425 unsigned int size, unsigned short port,
1428 struct read_cache *rc = &ctxt->io_read;
1430 if (rc->pos == rc->end) { /* refill pio read ahead */
1431 unsigned int in_page, n;
1432 unsigned int count = ctxt->rep_prefix ?
1433 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1434 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1435 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1436 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1437 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1440 rc->pos = rc->end = 0;
1441 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1446 if (ctxt->rep_prefix && (ctxt->d & String) &&
1447 !(ctxt->eflags & X86_EFLAGS_DF)) {
1448 ctxt->dst.data = rc->data + rc->pos;
1449 ctxt->dst.type = OP_MEM_STR;
1450 ctxt->dst.count = (rc->end - rc->pos) / size;
1453 memcpy(dest, rc->data + rc->pos, size);
1459 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1460 u16 index, struct desc_struct *desc)
1465 ctxt->ops->get_idt(ctxt, &dt);
1467 if (dt.size < index * 8 + 7)
1468 return emulate_gp(ctxt, index << 3 | 0x2);
1470 addr = dt.address + index * 8;
1471 return linear_read_system(ctxt, addr, desc, sizeof(*desc));
1474 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1475 u16 selector, struct desc_ptr *dt)
1477 const struct x86_emulate_ops *ops = ctxt->ops;
1480 if (selector & 1 << 2) {
1481 struct desc_struct desc;
1484 memset(dt, 0, sizeof(*dt));
1485 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1489 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1490 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1492 ops->get_gdt(ctxt, dt);
1495 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1496 u16 selector, ulong *desc_addr_p)
1499 u16 index = selector >> 3;
1502 get_descriptor_table_ptr(ctxt, selector, &dt);
1504 if (dt.size < index * 8 + 7)
1505 return emulate_gp(ctxt, selector & 0xfffc);
1507 addr = dt.address + index * 8;
1509 #ifdef CONFIG_X86_64
1510 if (addr >> 32 != 0) {
1513 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1514 if (!(efer & EFER_LMA))
1519 *desc_addr_p = addr;
1520 return X86EMUL_CONTINUE;
1523 /* allowed just for 8 bytes segments */
1524 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1525 u16 selector, struct desc_struct *desc,
1530 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1531 if (rc != X86EMUL_CONTINUE)
1534 return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
1537 /* allowed just for 8 bytes segments */
1538 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1539 u16 selector, struct desc_struct *desc)
1544 rc = get_descriptor_ptr(ctxt, selector, &addr);
1545 if (rc != X86EMUL_CONTINUE)
1548 return linear_write_system(ctxt, addr, desc, sizeof(*desc));
1551 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1552 u16 selector, int seg, u8 cpl,
1553 enum x86_transfer_type transfer,
1554 struct desc_struct *desc)
1556 struct desc_struct seg_desc, old_desc;
1558 unsigned err_vec = GP_VECTOR;
1560 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1566 memset(&seg_desc, 0, sizeof(seg_desc));
1568 if (ctxt->mode == X86EMUL_MODE_REAL) {
1569 /* set real mode segment descriptor (keep limit etc. for
1571 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1572 set_desc_base(&seg_desc, selector << 4);
1574 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1575 /* VM86 needs a clean new segment descriptor */
1576 set_desc_base(&seg_desc, selector << 4);
1577 set_desc_limit(&seg_desc, 0xffff);
1587 /* TR should be in GDT only */
1588 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1591 /* NULL selector is not valid for TR, CS and (except for long mode) SS */
1592 if (null_selector) {
1593 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
1596 if (seg == VCPU_SREG_SS) {
1597 if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
1601 * ctxt->ops->set_segment expects the CPL to be in
1602 * SS.DPL, so fake an expand-up 32-bit data segment.
1612 /* Skip all following checks */
1616 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1617 if (ret != X86EMUL_CONTINUE)
1620 err_code = selector & 0xfffc;
1621 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1624 /* can't load system descriptor into segment selector */
1625 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1626 if (transfer == X86_TRANSFER_CALL_JMP)
1627 return X86EMUL_UNHANDLEABLE;
1636 * segment is not a writable data segment or segment
1637 * selector's RPL != CPL or DPL != CPL
1639 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1644 * KVM uses "none" when loading CS as part of emulating Real
1645 * Mode exceptions and IRET (handled above). In all other
1646 * cases, loading CS without a control transfer is a KVM bug.
1648 if (WARN_ON_ONCE(transfer == X86_TRANSFER_NONE))
1651 if (!(seg_desc.type & 8))
1654 if (transfer == X86_TRANSFER_RET) {
1655 /* RET can never return to an inner privilege level. */
1658 /* Outer-privilege level return is not implemented */
1660 return X86EMUL_UNHANDLEABLE;
1662 if (transfer == X86_TRANSFER_RET || transfer == X86_TRANSFER_TASK_SWITCH) {
1663 if (seg_desc.type & 4) {
1672 } else { /* X86_TRANSFER_CALL_JMP */
1673 if (seg_desc.type & 4) {
1679 if (rpl > cpl || dpl != cpl)
1683 /* in long-mode d/b must be clear if l is set */
1684 if (seg_desc.d && seg_desc.l) {
1687 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1688 if (efer & EFER_LMA)
1692 /* CS(RPL) <- CPL */
1693 selector = (selector & 0xfffc) | cpl;
1696 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1699 case VCPU_SREG_LDTR:
1700 if (seg_desc.s || seg_desc.type != 2)
1703 default: /* DS, ES, FS, or GS */
1705 * segment is not a data or readable code segment or
1706 * ((segment is a data or nonconforming code segment)
1707 * and ((RPL > DPL) or (CPL > DPL)))
1709 if ((seg_desc.type & 0xa) == 0x8 ||
1710 (((seg_desc.type & 0xc) != 0xc) &&
1711 (rpl > dpl || cpl > dpl)))
1717 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1722 /* mark segment as accessed */
1723 if (!(seg_desc.type & 1)) {
1725 ret = write_segment_descriptor(ctxt, selector,
1727 if (ret != X86EMUL_CONTINUE)
1730 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1731 ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
1732 if (ret != X86EMUL_CONTINUE)
1734 if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1735 ((u64)base3 << 32), ctxt))
1736 return emulate_gp(ctxt, err_code);
1739 if (seg == VCPU_SREG_TR) {
1740 old_desc = seg_desc;
1741 seg_desc.type |= 2; /* busy */
1742 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1743 sizeof(seg_desc), &ctxt->exception);
1744 if (ret != X86EMUL_CONTINUE)
1748 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1751 return X86EMUL_CONTINUE;
1753 return emulate_exception(ctxt, err_vec, err_code, true);
1756 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1757 u16 selector, int seg)
1759 u8 cpl = ctxt->ops->cpl(ctxt);
1762 * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
1763 * they can load it at CPL<3 (Intel's manual says only LSS can,
1766 * However, the Intel manual says that putting IST=1/DPL=3 in
1767 * an interrupt gate will result in SS=3 (the AMD manual instead
1768 * says it doesn't), so allow SS=3 in __load_segment_descriptor
1769 * and only forbid it here.
1771 if (seg == VCPU_SREG_SS && selector == 3 &&
1772 ctxt->mode == X86EMUL_MODE_PROT64)
1773 return emulate_exception(ctxt, GP_VECTOR, 0, true);
1775 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1776 X86_TRANSFER_NONE, NULL);
1779 static void write_register_operand(struct operand *op)
1781 return assign_register(op->addr.reg, op->val, op->bytes);
1784 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1788 write_register_operand(op);
1791 if (ctxt->lock_prefix)
1792 return segmented_cmpxchg(ctxt,
1798 return segmented_write(ctxt,
1804 return segmented_write(ctxt,
1807 op->bytes * op->count);
1810 kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
1813 kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
1821 return X86EMUL_CONTINUE;
1824 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1826 struct segmented_address addr;
1828 rsp_increment(ctxt, -bytes);
1829 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1830 addr.seg = VCPU_SREG_SS;
1832 return segmented_write(ctxt, addr, data, bytes);
1835 static int em_push(struct x86_emulate_ctxt *ctxt)
1837 /* Disable writeback. */
1838 ctxt->dst.type = OP_NONE;
1839 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1842 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1843 void *dest, int len)
1846 struct segmented_address addr;
1848 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1849 addr.seg = VCPU_SREG_SS;
1850 rc = segmented_read(ctxt, addr, dest, len);
1851 if (rc != X86EMUL_CONTINUE)
1854 rsp_increment(ctxt, len);
1858 static int em_pop(struct x86_emulate_ctxt *ctxt)
1860 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1863 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1864 void *dest, int len)
1867 unsigned long val, change_mask;
1868 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1869 int cpl = ctxt->ops->cpl(ctxt);
1871 rc = emulate_pop(ctxt, &val, len);
1872 if (rc != X86EMUL_CONTINUE)
1875 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1876 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1877 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1878 X86_EFLAGS_AC | X86_EFLAGS_ID;
1880 switch(ctxt->mode) {
1881 case X86EMUL_MODE_PROT64:
1882 case X86EMUL_MODE_PROT32:
1883 case X86EMUL_MODE_PROT16:
1885 change_mask |= X86_EFLAGS_IOPL;
1887 change_mask |= X86_EFLAGS_IF;
1889 case X86EMUL_MODE_VM86:
1891 return emulate_gp(ctxt, 0);
1892 change_mask |= X86_EFLAGS_IF;
1894 default: /* real mode */
1895 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1899 *(unsigned long *)dest =
1900 (ctxt->eflags & ~change_mask) | (val & change_mask);
1905 static int em_popf(struct x86_emulate_ctxt *ctxt)
1907 ctxt->dst.type = OP_REG;
1908 ctxt->dst.addr.reg = &ctxt->eflags;
1909 ctxt->dst.bytes = ctxt->op_bytes;
1910 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1913 static int em_enter(struct x86_emulate_ctxt *ctxt)
1916 unsigned frame_size = ctxt->src.val;
1917 unsigned nesting_level = ctxt->src2.val & 31;
1921 return X86EMUL_UNHANDLEABLE;
1923 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1924 rc = push(ctxt, &rbp, stack_size(ctxt));
1925 if (rc != X86EMUL_CONTINUE)
1927 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1929 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1930 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1932 return X86EMUL_CONTINUE;
1935 static int em_leave(struct x86_emulate_ctxt *ctxt)
1937 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1939 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1942 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1944 int seg = ctxt->src2.val;
1946 ctxt->src.val = get_segment_selector(ctxt, seg);
1947 if (ctxt->op_bytes == 4) {
1948 rsp_increment(ctxt, -2);
1952 return em_push(ctxt);
1955 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1957 int seg = ctxt->src2.val;
1958 unsigned long selector;
1961 rc = emulate_pop(ctxt, &selector, 2);
1962 if (rc != X86EMUL_CONTINUE)
1965 if (seg == VCPU_SREG_SS)
1966 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1967 if (ctxt->op_bytes > 2)
1968 rsp_increment(ctxt, ctxt->op_bytes - 2);
1970 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1974 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1976 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1977 int rc = X86EMUL_CONTINUE;
1978 int reg = VCPU_REGS_RAX;
1980 while (reg <= VCPU_REGS_RDI) {
1981 (reg == VCPU_REGS_RSP) ?
1982 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1985 if (rc != X86EMUL_CONTINUE)
1994 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1996 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1997 return em_push(ctxt);
2000 static int em_popa(struct x86_emulate_ctxt *ctxt)
2002 int rc = X86EMUL_CONTINUE;
2003 int reg = VCPU_REGS_RDI;
2006 while (reg >= VCPU_REGS_RAX) {
2007 if (reg == VCPU_REGS_RSP) {
2008 rsp_increment(ctxt, ctxt->op_bytes);
2012 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
2013 if (rc != X86EMUL_CONTINUE)
2015 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
2021 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2023 const struct x86_emulate_ops *ops = ctxt->ops;
2030 /* TODO: Add limit checks */
2031 ctxt->src.val = ctxt->eflags;
2033 if (rc != X86EMUL_CONTINUE)
2036 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
2038 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
2040 if (rc != X86EMUL_CONTINUE)
2043 ctxt->src.val = ctxt->_eip;
2045 if (rc != X86EMUL_CONTINUE)
2048 ops->get_idt(ctxt, &dt);
2050 eip_addr = dt.address + (irq << 2);
2051 cs_addr = dt.address + (irq << 2) + 2;
2053 rc = linear_read_system(ctxt, cs_addr, &cs, 2);
2054 if (rc != X86EMUL_CONTINUE)
2057 rc = linear_read_system(ctxt, eip_addr, &eip, 2);
2058 if (rc != X86EMUL_CONTINUE)
2061 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2062 if (rc != X86EMUL_CONTINUE)
2070 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2074 invalidate_registers(ctxt);
2075 rc = __emulate_int_real(ctxt, irq);
2076 if (rc == X86EMUL_CONTINUE)
2077 writeback_registers(ctxt);
2081 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2083 switch(ctxt->mode) {
2084 case X86EMUL_MODE_REAL:
2085 return __emulate_int_real(ctxt, irq);
2086 case X86EMUL_MODE_VM86:
2087 case X86EMUL_MODE_PROT16:
2088 case X86EMUL_MODE_PROT32:
2089 case X86EMUL_MODE_PROT64:
2091 /* Protected mode interrupts unimplemented yet */
2092 return X86EMUL_UNHANDLEABLE;
2096 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2098 int rc = X86EMUL_CONTINUE;
2099 unsigned long temp_eip = 0;
2100 unsigned long temp_eflags = 0;
2101 unsigned long cs = 0;
2102 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2103 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2104 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2105 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2106 X86_EFLAGS_AC | X86_EFLAGS_ID |
2108 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2111 /* TODO: Add stack limit check */
2113 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2115 if (rc != X86EMUL_CONTINUE)
2118 if (temp_eip & ~0xffff)
2119 return emulate_gp(ctxt, 0);
2121 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2123 if (rc != X86EMUL_CONTINUE)
2126 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2128 if (rc != X86EMUL_CONTINUE)
2131 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2133 if (rc != X86EMUL_CONTINUE)
2136 ctxt->_eip = temp_eip;
2138 if (ctxt->op_bytes == 4)
2139 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2140 else if (ctxt->op_bytes == 2) {
2141 ctxt->eflags &= ~0xffff;
2142 ctxt->eflags |= temp_eflags;
2145 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2146 ctxt->eflags |= X86_EFLAGS_FIXED;
2147 ctxt->ops->set_nmi_mask(ctxt, false);
2152 static int em_iret(struct x86_emulate_ctxt *ctxt)
2154 switch(ctxt->mode) {
2155 case X86EMUL_MODE_REAL:
2156 return emulate_iret_real(ctxt);
2157 case X86EMUL_MODE_VM86:
2158 case X86EMUL_MODE_PROT16:
2159 case X86EMUL_MODE_PROT32:
2160 case X86EMUL_MODE_PROT64:
2162 /* iret from protected mode unimplemented yet */
2163 return X86EMUL_UNHANDLEABLE;
2167 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2171 struct desc_struct new_desc;
2172 u8 cpl = ctxt->ops->cpl(ctxt);
2174 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2176 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2177 X86_TRANSFER_CALL_JMP,
2179 if (rc != X86EMUL_CONTINUE)
2182 rc = assign_eip_far(ctxt, ctxt->src.val);
2183 /* Error handling is not implemented. */
2184 if (rc != X86EMUL_CONTINUE)
2185 return X86EMUL_UNHANDLEABLE;
2190 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2192 return assign_eip_near(ctxt, ctxt->src.val);
2195 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2200 old_eip = ctxt->_eip;
2201 rc = assign_eip_near(ctxt, ctxt->src.val);
2202 if (rc != X86EMUL_CONTINUE)
2204 ctxt->src.val = old_eip;
2209 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2211 u64 old = ctxt->dst.orig_val64;
2213 if (ctxt->dst.bytes == 16)
2214 return X86EMUL_UNHANDLEABLE;
2216 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2217 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2218 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2219 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2220 ctxt->eflags &= ~X86_EFLAGS_ZF;
2222 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2223 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2225 ctxt->eflags |= X86_EFLAGS_ZF;
2227 return X86EMUL_CONTINUE;
2230 static int em_ret(struct x86_emulate_ctxt *ctxt)
2235 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2236 if (rc != X86EMUL_CONTINUE)
2239 return assign_eip_near(ctxt, eip);
2242 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2245 unsigned long eip, cs;
2246 int cpl = ctxt->ops->cpl(ctxt);
2247 struct desc_struct new_desc;
2249 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2250 if (rc != X86EMUL_CONTINUE)
2252 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2253 if (rc != X86EMUL_CONTINUE)
2255 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2258 if (rc != X86EMUL_CONTINUE)
2260 rc = assign_eip_far(ctxt, eip);
2261 /* Error handling is not implemented. */
2262 if (rc != X86EMUL_CONTINUE)
2263 return X86EMUL_UNHANDLEABLE;
2268 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2272 rc = em_ret_far(ctxt);
2273 if (rc != X86EMUL_CONTINUE)
2275 rsp_increment(ctxt, ctxt->src.val);
2276 return X86EMUL_CONTINUE;
2279 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2281 /* Save real source value, then compare EAX against destination. */
2282 ctxt->dst.orig_val = ctxt->dst.val;
2283 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2284 ctxt->src.orig_val = ctxt->src.val;
2285 ctxt->src.val = ctxt->dst.orig_val;
2286 fastop(ctxt, em_cmp);
2288 if (ctxt->eflags & X86_EFLAGS_ZF) {
2289 /* Success: write back to memory; no update of EAX */
2290 ctxt->src.type = OP_NONE;
2291 ctxt->dst.val = ctxt->src.orig_val;
2293 /* Failure: write the value we saw to EAX. */
2294 ctxt->src.type = OP_REG;
2295 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2296 ctxt->src.val = ctxt->dst.orig_val;
2297 /* Create write-cycle to dest by writing the same value */
2298 ctxt->dst.val = ctxt->dst.orig_val;
2300 return X86EMUL_CONTINUE;
2303 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2305 int seg = ctxt->src2.val;
2309 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2311 rc = load_segment_descriptor(ctxt, sel, seg);
2312 if (rc != X86EMUL_CONTINUE)
2315 ctxt->dst.val = ctxt->src.val;
2319 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2321 if (!ctxt->ops->is_smm(ctxt))
2322 return emulate_ud(ctxt);
2324 if (ctxt->ops->leave_smm(ctxt))
2325 ctxt->ops->triple_fault(ctxt);
2327 return emulator_recalc_and_set_mode(ctxt);
2331 setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
2333 cs->l = 0; /* will be adjusted later */
2334 set_desc_base(cs, 0); /* flat segment */
2335 cs->g = 1; /* 4kb granularity */
2336 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2337 cs->type = 0x0b; /* Read, Execute, Accessed */
2339 cs->dpl = 0; /* will be adjusted later */
2344 set_desc_base(ss, 0); /* flat segment */
2345 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2346 ss->g = 1; /* 4kb granularity */
2348 ss->type = 0x03; /* Read/Write, Accessed */
2349 ss->d = 1; /* 32bit stack segment */
2356 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2358 u32 eax, ebx, ecx, edx;
2361 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2362 return is_guest_vendor_intel(ebx, ecx, edx);
2365 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2367 const struct x86_emulate_ops *ops = ctxt->ops;
2368 u32 eax, ebx, ecx, edx;
2371 * syscall should always be enabled in longmode - so only become
2372 * vendor specific (cpuid) if other modes are active...
2374 if (ctxt->mode == X86EMUL_MODE_PROT64)
2379 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
2381 * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
2382 * 64bit guest with a 32bit compat-app running will #UD !! While this
2383 * behaviour can be fixed (by emulating) into AMD response - CPUs of
2384 * AMD can't behave like Intel.
2386 if (is_guest_vendor_intel(ebx, ecx, edx))
2389 if (is_guest_vendor_amd(ebx, ecx, edx) ||
2390 is_guest_vendor_hygon(ebx, ecx, edx))
2394 * default: (not Intel, not AMD, not Hygon), apply Intel's
2400 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2402 const struct x86_emulate_ops *ops = ctxt->ops;
2403 struct desc_struct cs, ss;
2408 /* syscall is not available in real mode */
2409 if (ctxt->mode == X86EMUL_MODE_REAL ||
2410 ctxt->mode == X86EMUL_MODE_VM86)
2411 return emulate_ud(ctxt);
2413 if (!(em_syscall_is_enabled(ctxt)))
2414 return emulate_ud(ctxt);
2416 ops->get_msr(ctxt, MSR_EFER, &efer);
2417 if (!(efer & EFER_SCE))
2418 return emulate_ud(ctxt);
2420 setup_syscalls_segments(&cs, &ss);
2421 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2423 cs_sel = (u16)(msr_data & 0xfffc);
2424 ss_sel = (u16)(msr_data + 8);
2426 if (efer & EFER_LMA) {
2430 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2431 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2433 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2434 if (efer & EFER_LMA) {
2435 #ifdef CONFIG_X86_64
2436 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2439 ctxt->mode == X86EMUL_MODE_PROT64 ?
2440 MSR_LSTAR : MSR_CSTAR, &msr_data);
2441 ctxt->_eip = msr_data;
2443 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2444 ctxt->eflags &= ~msr_data;
2445 ctxt->eflags |= X86_EFLAGS_FIXED;
2449 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2450 ctxt->_eip = (u32)msr_data;
2452 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2455 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
2456 return X86EMUL_CONTINUE;
2459 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2461 const struct x86_emulate_ops *ops = ctxt->ops;
2462 struct desc_struct cs, ss;
2467 ops->get_msr(ctxt, MSR_EFER, &efer);
2468 /* inject #GP if in real mode */
2469 if (ctxt->mode == X86EMUL_MODE_REAL)
2470 return emulate_gp(ctxt, 0);
2473 * Not recognized on AMD in compat mode (but is recognized in legacy
2476 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2477 && !vendor_intel(ctxt))
2478 return emulate_ud(ctxt);
2480 /* sysenter/sysexit have not been tested in 64bit mode. */
2481 if (ctxt->mode == X86EMUL_MODE_PROT64)
2482 return X86EMUL_UNHANDLEABLE;
2484 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2485 if ((msr_data & 0xfffc) == 0x0)
2486 return emulate_gp(ctxt, 0);
2488 setup_syscalls_segments(&cs, &ss);
2489 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2490 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2491 ss_sel = cs_sel + 8;
2492 if (efer & EFER_LMA) {
2497 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2498 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2500 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2501 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2503 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2504 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2506 if (efer & EFER_LMA)
2507 ctxt->mode = X86EMUL_MODE_PROT64;
2509 return X86EMUL_CONTINUE;
2512 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2514 const struct x86_emulate_ops *ops = ctxt->ops;
2515 struct desc_struct cs, ss;
2516 u64 msr_data, rcx, rdx;
2518 u16 cs_sel = 0, ss_sel = 0;
2520 /* inject #GP if in real mode or Virtual 8086 mode */
2521 if (ctxt->mode == X86EMUL_MODE_REAL ||
2522 ctxt->mode == X86EMUL_MODE_VM86)
2523 return emulate_gp(ctxt, 0);
2525 setup_syscalls_segments(&cs, &ss);
2527 if ((ctxt->rex_prefix & 0x8) != 0x0)
2528 usermode = X86EMUL_MODE_PROT64;
2530 usermode = X86EMUL_MODE_PROT32;
2532 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2533 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2537 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2539 case X86EMUL_MODE_PROT32:
2540 cs_sel = (u16)(msr_data + 16);
2541 if ((msr_data & 0xfffc) == 0x0)
2542 return emulate_gp(ctxt, 0);
2543 ss_sel = (u16)(msr_data + 24);
2547 case X86EMUL_MODE_PROT64:
2548 cs_sel = (u16)(msr_data + 32);
2549 if (msr_data == 0x0)
2550 return emulate_gp(ctxt, 0);
2551 ss_sel = cs_sel + 8;
2554 if (emul_is_noncanonical_address(rcx, ctxt) ||
2555 emul_is_noncanonical_address(rdx, ctxt))
2556 return emulate_gp(ctxt, 0);
2559 cs_sel |= SEGMENT_RPL_MASK;
2560 ss_sel |= SEGMENT_RPL_MASK;
2562 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2563 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2566 ctxt->mode = usermode;
2567 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2569 return X86EMUL_CONTINUE;
2572 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2575 if (ctxt->mode == X86EMUL_MODE_REAL)
2577 if (ctxt->mode == X86EMUL_MODE_VM86)
2579 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2580 return ctxt->ops->cpl(ctxt) > iopl;
2583 #define VMWARE_PORT_VMPORT (0x5658)
2584 #define VMWARE_PORT_VMRPC (0x5659)
2586 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2589 const struct x86_emulate_ops *ops = ctxt->ops;
2590 struct desc_struct tr_seg;
2593 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2594 unsigned mask = (1 << len) - 1;
2598 * VMware allows access to these ports even if denied
2599 * by TSS I/O permission bitmap. Mimic behavior.
2601 if (enable_vmware_backdoor &&
2602 ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
2605 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2608 if (desc_limit_scaled(&tr_seg) < 103)
2610 base = get_desc_base(&tr_seg);
2611 #ifdef CONFIG_X86_64
2612 base |= ((u64)base3) << 32;
2614 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
2615 if (r != X86EMUL_CONTINUE)
2617 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2619 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
2620 if (r != X86EMUL_CONTINUE)
2622 if ((perm >> bit_idx) & mask)
2627 static bool emulator_io_permitted(struct x86_emulate_ctxt *ctxt,
2633 if (emulator_bad_iopl(ctxt))
2634 if (!emulator_io_port_access_allowed(ctxt, port, len))
2637 ctxt->perm_ok = true;
2642 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2645 * Intel CPUs mask the counter and pointers in quite strange
2646 * manner when ECX is zero due to REP-string optimizations.
2648 #ifdef CONFIG_X86_64
2649 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2652 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2655 case 0xa4: /* movsb */
2656 case 0xa5: /* movsd/w */
2657 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2659 case 0xaa: /* stosb */
2660 case 0xab: /* stosd/w */
2661 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2666 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2667 struct tss_segment_16 *tss)
2669 tss->ip = ctxt->_eip;
2670 tss->flag = ctxt->eflags;
2671 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2672 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2673 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2674 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2675 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2676 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2677 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2678 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2680 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2681 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2682 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2683 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2684 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2687 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2688 struct tss_segment_16 *tss)
2693 ctxt->_eip = tss->ip;
2694 ctxt->eflags = tss->flag | 2;
2695 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2696 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2697 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2698 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2699 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2700 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2701 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2702 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2705 * SDM says that segment selectors are loaded before segment
2708 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2709 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2710 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2711 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2712 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2717 * Now load segment descriptors. If fault happens at this stage
2718 * it is handled in a context of new task
2720 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2721 X86_TRANSFER_TASK_SWITCH, NULL);
2722 if (ret != X86EMUL_CONTINUE)
2724 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2725 X86_TRANSFER_TASK_SWITCH, NULL);
2726 if (ret != X86EMUL_CONTINUE)
2728 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2729 X86_TRANSFER_TASK_SWITCH, NULL);
2730 if (ret != X86EMUL_CONTINUE)
2732 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2733 X86_TRANSFER_TASK_SWITCH, NULL);
2734 if (ret != X86EMUL_CONTINUE)
2736 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2737 X86_TRANSFER_TASK_SWITCH, NULL);
2738 if (ret != X86EMUL_CONTINUE)
2741 return X86EMUL_CONTINUE;
2744 static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2745 ulong old_tss_base, struct desc_struct *new_desc)
2747 struct tss_segment_16 tss_seg;
2749 u32 new_tss_base = get_desc_base(new_desc);
2751 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2752 if (ret != X86EMUL_CONTINUE)
2755 save_state_to_tss16(ctxt, &tss_seg);
2757 ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2758 if (ret != X86EMUL_CONTINUE)
2761 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2762 if (ret != X86EMUL_CONTINUE)
2765 if (old_tss_sel != 0xffff) {
2766 tss_seg.prev_task_link = old_tss_sel;
2768 ret = linear_write_system(ctxt, new_tss_base,
2769 &tss_seg.prev_task_link,
2770 sizeof(tss_seg.prev_task_link));
2771 if (ret != X86EMUL_CONTINUE)
2775 return load_state_from_tss16(ctxt, &tss_seg);
2778 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2779 struct tss_segment_32 *tss)
2781 /* CR3 and ldt selector are not saved intentionally */
2782 tss->eip = ctxt->_eip;
2783 tss->eflags = ctxt->eflags;
2784 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
2785 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
2786 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
2787 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
2788 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
2789 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
2790 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
2791 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
2793 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2794 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2795 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2796 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2797 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
2798 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
2801 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2802 struct tss_segment_32 *tss)
2807 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
2808 return emulate_gp(ctxt, 0);
2809 ctxt->_eip = tss->eip;
2810 ctxt->eflags = tss->eflags | 2;
2812 /* General purpose registers */
2813 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
2814 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
2815 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
2816 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
2817 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
2818 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
2819 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
2820 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
2823 * SDM says that segment selectors are loaded before segment
2824 * descriptors. This is important because CPL checks will
2827 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
2828 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2829 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2830 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2831 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2832 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
2833 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
2836 * If we're switching between Protected Mode and VM86, we need to make
2837 * sure to update the mode before loading the segment descriptors so
2838 * that the selectors are interpreted correctly.
2840 if (ctxt->eflags & X86_EFLAGS_VM) {
2841 ctxt->mode = X86EMUL_MODE_VM86;
2844 ctxt->mode = X86EMUL_MODE_PROT32;
2849 * Now load segment descriptors. If fault happens at this stage
2850 * it is handled in a context of new task
2852 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2853 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
2854 if (ret != X86EMUL_CONTINUE)
2856 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2857 X86_TRANSFER_TASK_SWITCH, NULL);
2858 if (ret != X86EMUL_CONTINUE)
2860 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2861 X86_TRANSFER_TASK_SWITCH, NULL);
2862 if (ret != X86EMUL_CONTINUE)
2864 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2865 X86_TRANSFER_TASK_SWITCH, NULL);
2866 if (ret != X86EMUL_CONTINUE)
2868 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2869 X86_TRANSFER_TASK_SWITCH, NULL);
2870 if (ret != X86EMUL_CONTINUE)
2872 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2873 X86_TRANSFER_TASK_SWITCH, NULL);
2874 if (ret != X86EMUL_CONTINUE)
2876 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2877 X86_TRANSFER_TASK_SWITCH, NULL);
2882 static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 old_tss_sel,
2883 ulong old_tss_base, struct desc_struct *new_desc)
2885 struct tss_segment_32 tss_seg;
2887 u32 new_tss_base = get_desc_base(new_desc);
2888 u32 eip_offset = offsetof(struct tss_segment_32, eip);
2889 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
2891 ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
2892 if (ret != X86EMUL_CONTINUE)
2895 save_state_to_tss32(ctxt, &tss_seg);
2897 /* Only GP registers and segment selectors are saved */
2898 ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
2899 ldt_sel_offset - eip_offset);
2900 if (ret != X86EMUL_CONTINUE)
2903 ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
2904 if (ret != X86EMUL_CONTINUE)
2907 if (old_tss_sel != 0xffff) {
2908 tss_seg.prev_task_link = old_tss_sel;
2910 ret = linear_write_system(ctxt, new_tss_base,
2911 &tss_seg.prev_task_link,
2912 sizeof(tss_seg.prev_task_link));
2913 if (ret != X86EMUL_CONTINUE)
2917 return load_state_from_tss32(ctxt, &tss_seg);
2920 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2921 u16 tss_selector, int idt_index, int reason,
2922 bool has_error_code, u32 error_code)
2924 const struct x86_emulate_ops *ops = ctxt->ops;
2925 struct desc_struct curr_tss_desc, next_tss_desc;
2927 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
2928 ulong old_tss_base =
2929 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
2931 ulong desc_addr, dr7;
2933 /* FIXME: old_tss_base == ~0 ? */
2935 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
2936 if (ret != X86EMUL_CONTINUE)
2938 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
2939 if (ret != X86EMUL_CONTINUE)
2942 /* FIXME: check that next_tss_desc is tss */
2945 * Check privileges. The three cases are task switch caused by...
2947 * 1. jmp/call/int to task gate: Check against DPL of the task gate
2948 * 2. Exception/IRQ/iret: No check is performed
2949 * 3. jmp/call to TSS/task-gate: No check is performed since the
2950 * hardware checks it before exiting.
2952 if (reason == TASK_SWITCH_GATE) {
2953 if (idt_index != -1) {
2954 /* Software interrupts */
2955 struct desc_struct task_gate_desc;
2958 ret = read_interrupt_descriptor(ctxt, idt_index,
2960 if (ret != X86EMUL_CONTINUE)
2963 dpl = task_gate_desc.dpl;
2964 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
2965 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
2969 desc_limit = desc_limit_scaled(&next_tss_desc);
2970 if (!next_tss_desc.p ||
2971 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2972 desc_limit < 0x2b)) {
2973 return emulate_ts(ctxt, tss_selector & 0xfffc);
2976 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2977 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2978 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
2981 if (reason == TASK_SWITCH_IRET)
2982 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2984 /* set back link to prev task only if NT bit is set in eflags
2985 note that old_tss_sel is not used after this point */
2986 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2987 old_tss_sel = 0xffff;
2989 if (next_tss_desc.type & 8)
2990 ret = task_switch_32(ctxt, old_tss_sel, old_tss_base, &next_tss_desc);
2992 ret = task_switch_16(ctxt, old_tss_sel,
2993 old_tss_base, &next_tss_desc);
2994 if (ret != X86EMUL_CONTINUE)
2997 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2998 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3000 if (reason != TASK_SWITCH_IRET) {
3001 next_tss_desc.type |= (1 << 1); /* set busy flag */
3002 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3005 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3006 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3008 if (has_error_code) {
3009 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3010 ctxt->lock_prefix = 0;
3011 ctxt->src.val = (unsigned long) error_code;
3012 ret = em_push(ctxt);
3015 ops->get_dr(ctxt, 7, &dr7);
3016 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3021 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3022 u16 tss_selector, int idt_index, int reason,
3023 bool has_error_code, u32 error_code)
3027 invalidate_registers(ctxt);
3028 ctxt->_eip = ctxt->eip;
3029 ctxt->dst.type = OP_NONE;
3031 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3032 has_error_code, error_code);
3034 if (rc == X86EMUL_CONTINUE) {
3035 ctxt->eip = ctxt->_eip;
3036 writeback_registers(ctxt);
3039 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3042 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3045 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3047 register_address_increment(ctxt, reg, df * op->bytes);
3048 op->addr.mem.ea = register_address(ctxt, reg);
3051 static int em_das(struct x86_emulate_ctxt *ctxt)
3054 bool af, cf, old_cf;
3056 cf = ctxt->eflags & X86_EFLAGS_CF;
3062 af = ctxt->eflags & X86_EFLAGS_AF;
3063 if ((al & 0x0f) > 9 || af) {
3065 cf = old_cf | (al >= 250);
3070 if (old_al > 0x99 || old_cf) {
3076 /* Set PF, ZF, SF */
3077 ctxt->src.type = OP_IMM;
3079 ctxt->src.bytes = 1;
3080 fastop(ctxt, em_or);
3081 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3083 ctxt->eflags |= X86_EFLAGS_CF;
3085 ctxt->eflags |= X86_EFLAGS_AF;
3086 return X86EMUL_CONTINUE;
3089 static int em_aam(struct x86_emulate_ctxt *ctxt)
3093 if (ctxt->src.val == 0)
3094 return emulate_de(ctxt);
3096 al = ctxt->dst.val & 0xff;
3097 ah = al / ctxt->src.val;
3098 al %= ctxt->src.val;
3100 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3102 /* Set PF, ZF, SF */
3103 ctxt->src.type = OP_IMM;
3105 ctxt->src.bytes = 1;
3106 fastop(ctxt, em_or);
3108 return X86EMUL_CONTINUE;
3111 static int em_aad(struct x86_emulate_ctxt *ctxt)
3113 u8 al = ctxt->dst.val & 0xff;
3114 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3116 al = (al + (ah * ctxt->src.val)) & 0xff;
3118 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3120 /* Set PF, ZF, SF */
3121 ctxt->src.type = OP_IMM;
3123 ctxt->src.bytes = 1;
3124 fastop(ctxt, em_or);
3126 return X86EMUL_CONTINUE;
3129 static int em_call(struct x86_emulate_ctxt *ctxt)
3132 long rel = ctxt->src.val;
3134 ctxt->src.val = (unsigned long)ctxt->_eip;
3135 rc = jmp_rel(ctxt, rel);
3136 if (rc != X86EMUL_CONTINUE)
3138 return em_push(ctxt);
3141 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3146 struct desc_struct old_desc, new_desc;
3147 const struct x86_emulate_ops *ops = ctxt->ops;
3148 int cpl = ctxt->ops->cpl(ctxt);
3149 enum x86emul_mode prev_mode = ctxt->mode;
3151 old_eip = ctxt->_eip;
3152 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3154 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3155 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3156 X86_TRANSFER_CALL_JMP, &new_desc);
3157 if (rc != X86EMUL_CONTINUE)
3160 rc = assign_eip_far(ctxt, ctxt->src.val);
3161 if (rc != X86EMUL_CONTINUE)
3164 ctxt->src.val = old_cs;
3166 if (rc != X86EMUL_CONTINUE)
3169 ctxt->src.val = old_eip;
3171 /* If we failed, we tainted the memory, but the very least we should
3173 if (rc != X86EMUL_CONTINUE) {
3174 pr_warn_once("faulting far call emulation tainted memory\n");
3179 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3180 ctxt->mode = prev_mode;
3185 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3190 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3191 if (rc != X86EMUL_CONTINUE)
3193 rc = assign_eip_near(ctxt, eip);
3194 if (rc != X86EMUL_CONTINUE)
3196 rsp_increment(ctxt, ctxt->src.val);
3197 return X86EMUL_CONTINUE;
3200 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3202 /* Write back the register source. */
3203 ctxt->src.val = ctxt->dst.val;
3204 write_register_operand(&ctxt->src);
3206 /* Write back the memory destination with implicit LOCK prefix. */
3207 ctxt->dst.val = ctxt->src.orig_val;
3208 ctxt->lock_prefix = 1;
3209 return X86EMUL_CONTINUE;
3212 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3214 ctxt->dst.val = ctxt->src2.val;
3215 return fastop(ctxt, em_imul);
3218 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3220 ctxt->dst.type = OP_REG;
3221 ctxt->dst.bytes = ctxt->src.bytes;
3222 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3223 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3225 return X86EMUL_CONTINUE;
3228 static int em_rdpid(struct x86_emulate_ctxt *ctxt)
3232 if (!ctxt->ops->guest_has_rdpid(ctxt))
3233 return emulate_ud(ctxt);
3235 ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux);
3236 ctxt->dst.val = tsc_aux;
3237 return X86EMUL_CONTINUE;
3240 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3244 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3245 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3246 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3247 return X86EMUL_CONTINUE;
3250 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3254 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3255 return emulate_gp(ctxt, 0);
3256 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3257 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3258 return X86EMUL_CONTINUE;
3261 static int em_mov(struct x86_emulate_ctxt *ctxt)
3263 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3264 return X86EMUL_CONTINUE;
3267 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3271 if (!ctxt->ops->guest_has_movbe(ctxt))
3272 return emulate_ud(ctxt);
3274 switch (ctxt->op_bytes) {
3277 * From MOVBE definition: "...When the operand size is 16 bits,
3278 * the upper word of the destination register remains unchanged
3281 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3282 * rules so we have to do the operation almost per hand.
3284 tmp = (u16)ctxt->src.val;
3285 ctxt->dst.val &= ~0xffffUL;
3286 ctxt->dst.val |= (unsigned long)swab16(tmp);
3289 ctxt->dst.val = swab32((u32)ctxt->src.val);
3292 ctxt->dst.val = swab64(ctxt->src.val);
3297 return X86EMUL_CONTINUE;
3300 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3302 int cr_num = ctxt->modrm_reg;
3305 if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
3306 return emulate_gp(ctxt, 0);
3308 /* Disable writeback. */
3309 ctxt->dst.type = OP_NONE;
3313 * CR0 write might have updated CR0.PE and/or CR0.PG
3314 * which can affect the cpu's execution mode.
3316 r = emulator_recalc_and_set_mode(ctxt);
3317 if (r != X86EMUL_CONTINUE)
3321 return X86EMUL_CONTINUE;
3324 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3328 if (ctxt->mode == X86EMUL_MODE_PROT64)
3329 val = ctxt->src.val & ~0ULL;
3331 val = ctxt->src.val & ~0U;
3333 /* #UD condition is already handled. */
3334 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3335 return emulate_gp(ctxt, 0);
3337 /* Disable writeback. */
3338 ctxt->dst.type = OP_NONE;
3339 return X86EMUL_CONTINUE;
3342 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3344 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3348 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3349 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3350 r = ctxt->ops->set_msr_with_filter(ctxt, msr_index, msr_data);
3352 if (r == X86EMUL_PROPAGATE_FAULT)
3353 return emulate_gp(ctxt, 0);
3358 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3360 u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
3364 r = ctxt->ops->get_msr_with_filter(ctxt, msr_index, &msr_data);
3366 if (r == X86EMUL_PROPAGATE_FAULT)
3367 return emulate_gp(ctxt, 0);
3369 if (r == X86EMUL_CONTINUE) {
3370 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3371 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3376 static int em_store_sreg(struct x86_emulate_ctxt *ctxt, int segment)
3378 if (segment > VCPU_SREG_GS &&
3379 (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3380 ctxt->ops->cpl(ctxt) > 0)
3381 return emulate_gp(ctxt, 0);
3383 ctxt->dst.val = get_segment_selector(ctxt, segment);
3384 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3385 ctxt->dst.bytes = 2;
3386 return X86EMUL_CONTINUE;
3389 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3391 if (ctxt->modrm_reg > VCPU_SREG_GS)
3392 return emulate_ud(ctxt);
3394 return em_store_sreg(ctxt, ctxt->modrm_reg);
3397 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3399 u16 sel = ctxt->src.val;
3401 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3402 return emulate_ud(ctxt);
3404 if (ctxt->modrm_reg == VCPU_SREG_SS)
3405 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3407 /* Disable writeback. */
3408 ctxt->dst.type = OP_NONE;
3409 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3412 static int em_sldt(struct x86_emulate_ctxt *ctxt)
3414 return em_store_sreg(ctxt, VCPU_SREG_LDTR);
3417 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3419 u16 sel = ctxt->src.val;
3421 /* Disable writeback. */
3422 ctxt->dst.type = OP_NONE;
3423 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3426 static int em_str(struct x86_emulate_ctxt *ctxt)
3428 return em_store_sreg(ctxt, VCPU_SREG_TR);
3431 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3433 u16 sel = ctxt->src.val;
3435 /* Disable writeback. */
3436 ctxt->dst.type = OP_NONE;
3437 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3440 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3445 rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3446 if (rc == X86EMUL_CONTINUE)
3447 ctxt->ops->invlpg(ctxt, linear);
3448 /* Disable writeback. */
3449 ctxt->dst.type = OP_NONE;
3450 return X86EMUL_CONTINUE;
3453 static int em_clts(struct x86_emulate_ctxt *ctxt)
3457 cr0 = ctxt->ops->get_cr(ctxt, 0);
3459 ctxt->ops->set_cr(ctxt, 0, cr0);
3460 return X86EMUL_CONTINUE;
3463 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3465 int rc = ctxt->ops->fix_hypercall(ctxt);
3467 if (rc != X86EMUL_CONTINUE)
3470 /* Let the processor re-execute the fixed hypercall */
3471 ctxt->_eip = ctxt->eip;
3472 /* Disable writeback. */
3473 ctxt->dst.type = OP_NONE;
3474 return X86EMUL_CONTINUE;
3477 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3478 void (*get)(struct x86_emulate_ctxt *ctxt,
3479 struct desc_ptr *ptr))
3481 struct desc_ptr desc_ptr;
3483 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3484 ctxt->ops->cpl(ctxt) > 0)
3485 return emulate_gp(ctxt, 0);
3487 if (ctxt->mode == X86EMUL_MODE_PROT64)
3489 get(ctxt, &desc_ptr);
3490 if (ctxt->op_bytes == 2) {
3492 desc_ptr.address &= 0x00ffffff;
3494 /* Disable writeback. */
3495 ctxt->dst.type = OP_NONE;
3496 return segmented_write_std(ctxt, ctxt->dst.addr.mem,
3497 &desc_ptr, 2 + ctxt->op_bytes);
3500 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3502 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3505 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3507 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3510 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3512 struct desc_ptr desc_ptr;
3515 if (ctxt->mode == X86EMUL_MODE_PROT64)
3517 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3518 &desc_ptr.size, &desc_ptr.address,
3520 if (rc != X86EMUL_CONTINUE)
3522 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3523 emul_is_noncanonical_address(desc_ptr.address, ctxt))
3524 return emulate_gp(ctxt, 0);
3526 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3528 ctxt->ops->set_idt(ctxt, &desc_ptr);
3529 /* Disable writeback. */
3530 ctxt->dst.type = OP_NONE;
3531 return X86EMUL_CONTINUE;
3534 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3536 return em_lgdt_lidt(ctxt, true);
3539 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3541 return em_lgdt_lidt(ctxt, false);
3544 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3546 if ((ctxt->ops->get_cr(ctxt, 4) & X86_CR4_UMIP) &&
3547 ctxt->ops->cpl(ctxt) > 0)
3548 return emulate_gp(ctxt, 0);
3550 if (ctxt->dst.type == OP_MEM)
3551 ctxt->dst.bytes = 2;
3552 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3553 return X86EMUL_CONTINUE;
3556 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3558 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3559 | (ctxt->src.val & 0x0f));
3560 ctxt->dst.type = OP_NONE;
3561 return X86EMUL_CONTINUE;
3564 static int em_loop(struct x86_emulate_ctxt *ctxt)
3566 int rc = X86EMUL_CONTINUE;
3568 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3569 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3570 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3571 rc = jmp_rel(ctxt, ctxt->src.val);
3576 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3578 int rc = X86EMUL_CONTINUE;
3580 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3581 rc = jmp_rel(ctxt, ctxt->src.val);
3586 static int em_in(struct x86_emulate_ctxt *ctxt)
3588 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3590 return X86EMUL_IO_NEEDED;
3592 return X86EMUL_CONTINUE;
3595 static int em_out(struct x86_emulate_ctxt *ctxt)
3597 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3599 /* Disable writeback. */
3600 ctxt->dst.type = OP_NONE;
3601 return X86EMUL_CONTINUE;
3604 static int em_cli(struct x86_emulate_ctxt *ctxt)
3606 if (emulator_bad_iopl(ctxt))
3607 return emulate_gp(ctxt, 0);
3609 ctxt->eflags &= ~X86_EFLAGS_IF;
3610 return X86EMUL_CONTINUE;
3613 static int em_sti(struct x86_emulate_ctxt *ctxt)
3615 if (emulator_bad_iopl(ctxt))
3616 return emulate_gp(ctxt, 0);
3618 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3619 ctxt->eflags |= X86_EFLAGS_IF;
3620 return X86EMUL_CONTINUE;
3623 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3625 u32 eax, ebx, ecx, edx;
3628 ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
3629 if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
3630 ctxt->ops->cpl(ctxt)) {
3631 return emulate_gp(ctxt, 0);
3634 eax = reg_read(ctxt, VCPU_REGS_RAX);
3635 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3636 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, false);
3637 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3638 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3639 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3640 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3641 return X86EMUL_CONTINUE;
3644 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3648 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3650 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3652 ctxt->eflags &= ~0xffUL;
3653 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3654 return X86EMUL_CONTINUE;
3657 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3659 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3660 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3661 return X86EMUL_CONTINUE;
3664 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3666 switch (ctxt->op_bytes) {
3667 #ifdef CONFIG_X86_64
3669 asm("bswap %0" : "+r"(ctxt->dst.val));
3673 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3676 return X86EMUL_CONTINUE;
3679 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3681 /* emulating clflush regardless of cpuid */
3682 return X86EMUL_CONTINUE;
3685 static int em_clflushopt(struct x86_emulate_ctxt *ctxt)
3687 /* emulating clflushopt regardless of cpuid */
3688 return X86EMUL_CONTINUE;
3691 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3693 ctxt->dst.val = (s32) ctxt->src.val;
3694 return X86EMUL_CONTINUE;
3697 static int check_fxsr(struct x86_emulate_ctxt *ctxt)
3699 if (!ctxt->ops->guest_has_fxsr(ctxt))
3700 return emulate_ud(ctxt);
3702 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
3703 return emulate_nm(ctxt);
3706 * Don't emulate a case that should never be hit, instead of working
3707 * around a lack of fxsave64/fxrstor64 on old compilers.
3709 if (ctxt->mode >= X86EMUL_MODE_PROT64)
3710 return X86EMUL_UNHANDLEABLE;
3712 return X86EMUL_CONTINUE;
3716 * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but does save
3717 * and restore MXCSR.
3719 static size_t __fxstate_size(int nregs)
3721 return offsetof(struct fxregs_state, xmm_space[0]) + nregs * 16;
3724 static inline size_t fxstate_size(struct x86_emulate_ctxt *ctxt)
3727 if (ctxt->mode == X86EMUL_MODE_PROT64)
3728 return __fxstate_size(16);
3730 cr4_osfxsr = ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR;
3731 return __fxstate_size(cr4_osfxsr ? 8 : 0);
3735 * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
3738 * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
3739 * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
3741 * 3) 64-bit mode with REX.W prefix
3742 * - like (2), but XMM 8-15 are being saved and restored
3743 * 4) 64-bit mode without REX.W prefix
3744 * - like (3), but FIP and FDP are 64 bit
3746 * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
3747 * desired result. (4) is not emulated.
3749 * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
3750 * and FPU DS) should match.
3752 static int em_fxsave(struct x86_emulate_ctxt *ctxt)
3754 struct fxregs_state fx_state;
3757 rc = check_fxsr(ctxt);
3758 if (rc != X86EMUL_CONTINUE)
3763 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
3767 if (rc != X86EMUL_CONTINUE)
3770 return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state,
3771 fxstate_size(ctxt));
3775 * FXRSTOR might restore XMM registers not provided by the guest. Fill
3776 * in the host registers (via FXSAVE) instead, so they won't be modified.
3777 * (preemption has to stay disabled until FXRSTOR).
3779 * Use noinline to keep the stack for other functions called by callers small.
3781 static noinline int fxregs_fixup(struct fxregs_state *fx_state,
3782 const size_t used_size)
3784 struct fxregs_state fx_tmp;
3787 rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp));
3788 memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size,
3789 __fxstate_size(16) - used_size);
3794 static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
3796 struct fxregs_state fx_state;
3800 rc = check_fxsr(ctxt);
3801 if (rc != X86EMUL_CONTINUE)
3804 size = fxstate_size(ctxt);
3805 rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
3806 if (rc != X86EMUL_CONTINUE)
3811 if (size < __fxstate_size(16)) {
3812 rc = fxregs_fixup(&fx_state, size);
3813 if (rc != X86EMUL_CONTINUE)
3817 if (fx_state.mxcsr >> 16) {
3818 rc = emulate_gp(ctxt, 0);
3822 if (rc == X86EMUL_CONTINUE)
3823 rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
3831 static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
3835 if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE))
3836 return emulate_ud(ctxt);
3838 eax = reg_read(ctxt, VCPU_REGS_RAX);
3839 edx = reg_read(ctxt, VCPU_REGS_RDX);
3840 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3842 if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
3843 return emulate_gp(ctxt, 0);
3845 return X86EMUL_CONTINUE;
3848 static bool valid_cr(int nr)
3860 static int check_cr_access(struct x86_emulate_ctxt *ctxt)
3862 if (!valid_cr(ctxt->modrm_reg))
3863 return emulate_ud(ctxt);
3865 return X86EMUL_CONTINUE;
3868 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
3872 ctxt->ops->get_dr(ctxt, 7, &dr7);
3874 return dr7 & DR7_GD;
3877 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3879 int dr = ctxt->modrm_reg;
3883 return emulate_ud(ctxt);
3885 cr4 = ctxt->ops->get_cr(ctxt, 4);
3886 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3887 return emulate_ud(ctxt);
3889 if (check_dr7_gd(ctxt)) {
3892 ctxt->ops->get_dr(ctxt, 6, &dr6);
3893 dr6 &= ~DR_TRAP_BITS;
3894 dr6 |= DR6_BD | DR6_ACTIVE_LOW;
3895 ctxt->ops->set_dr(ctxt, 6, dr6);
3896 return emulate_db(ctxt);
3899 return X86EMUL_CONTINUE;
3902 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
3904 u64 new_val = ctxt->src.val64;
3905 int dr = ctxt->modrm_reg;
3907 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
3908 return emulate_gp(ctxt, 0);
3910 return check_dr_read(ctxt);
3913 static int check_svme(struct x86_emulate_ctxt *ctxt)
3917 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3919 if (!(efer & EFER_SVME))
3920 return emulate_ud(ctxt);
3922 return X86EMUL_CONTINUE;
3925 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
3927 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
3929 /* Valid physical address? */
3930 if (rax & 0xffff000000000000ULL)
3931 return emulate_gp(ctxt, 0);
3933 return check_svme(ctxt);
3936 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
3938 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3940 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
3941 return emulate_gp(ctxt, 0);
3943 return X86EMUL_CONTINUE;
3946 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
3948 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3949 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3952 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
3953 * in Ring3 when CR4.PCE=0.
3955 if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
3956 return X86EMUL_CONTINUE;
3959 * If CR4.PCE is set, the SDM requires CPL=0 or CR0.PE=0. The CR0.PE
3960 * check however is unnecessary because CPL is always 0 outside
3963 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3964 ctxt->ops->check_pmc(ctxt, rcx))
3965 return emulate_gp(ctxt, 0);
3967 return X86EMUL_CONTINUE;
3970 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
3972 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
3973 if (!emulator_io_permitted(ctxt, ctxt->src.val, ctxt->dst.bytes))
3974 return emulate_gp(ctxt, 0);
3976 return X86EMUL_CONTINUE;
3979 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3981 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
3982 if (!emulator_io_permitted(ctxt, ctxt->dst.val, ctxt->src.bytes))
3983 return emulate_gp(ctxt, 0);
3985 return X86EMUL_CONTINUE;
3988 #define D(_y) { .flags = (_y) }
3989 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
3990 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
3991 .intercept = x86_intercept_##_i, .check_perm = (_p) }
3992 #define N D(NotImpl)
3993 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3994 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3995 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3996 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
3997 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
3998 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3999 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4000 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4001 #define II(_f, _e, _i) \
4002 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4003 #define IIP(_f, _e, _i, _p) \
4004 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4005 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4006 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4008 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4009 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4010 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4011 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4012 #define I2bvIP(_f, _e, _i, _p) \
4013 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4015 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4016 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4017 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4019 static const struct opcode group7_rm0[] = {
4021 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4025 static const struct opcode group7_rm1[] = {
4026 DI(SrcNone | Priv, monitor),
4027 DI(SrcNone | Priv, mwait),
4031 static const struct opcode group7_rm2[] = {
4033 II(ImplicitOps | Priv, em_xsetbv, xsetbv),
4037 static const struct opcode group7_rm3[] = {
4038 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4039 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4040 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4041 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4042 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4043 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4044 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4045 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4048 static const struct opcode group7_rm7[] = {
4050 DIP(SrcNone, rdtscp, check_rdtsc),
4054 static const struct opcode group1[] = {
4056 F(Lock | PageTable, em_or),
4059 F(Lock | PageTable, em_and),
4065 static const struct opcode group1A[] = {
4066 I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
4069 static const struct opcode group2[] = {
4070 F(DstMem | ModRM, em_rol),
4071 F(DstMem | ModRM, em_ror),
4072 F(DstMem | ModRM, em_rcl),
4073 F(DstMem | ModRM, em_rcr),
4074 F(DstMem | ModRM, em_shl),
4075 F(DstMem | ModRM, em_shr),
4076 F(DstMem | ModRM, em_shl),
4077 F(DstMem | ModRM, em_sar),
4080 static const struct opcode group3[] = {
4081 F(DstMem | SrcImm | NoWrite, em_test),
4082 F(DstMem | SrcImm | NoWrite, em_test),
4083 F(DstMem | SrcNone | Lock, em_not),
4084 F(DstMem | SrcNone | Lock, em_neg),
4085 F(DstXacc | Src2Mem, em_mul_ex),
4086 F(DstXacc | Src2Mem, em_imul_ex),
4087 F(DstXacc | Src2Mem, em_div_ex),
4088 F(DstXacc | Src2Mem, em_idiv_ex),
4091 static const struct opcode group4[] = {
4092 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4093 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4097 static const struct opcode group5[] = {
4098 F(DstMem | SrcNone | Lock, em_inc),
4099 F(DstMem | SrcNone | Lock, em_dec),
4100 I(SrcMem | NearBranch | IsBranch, em_call_near_abs),
4101 I(SrcMemFAddr | ImplicitOps | IsBranch, em_call_far),
4102 I(SrcMem | NearBranch | IsBranch, em_jmp_abs),
4103 I(SrcMemFAddr | ImplicitOps | IsBranch, em_jmp_far),
4104 I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined),
4107 static const struct opcode group6[] = {
4108 II(Prot | DstMem, em_sldt, sldt),
4109 II(Prot | DstMem, em_str, str),
4110 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4111 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4115 static const struct group_dual group7 = { {
4116 II(Mov | DstMem, em_sgdt, sgdt),
4117 II(Mov | DstMem, em_sidt, sidt),
4118 II(SrcMem | Priv, em_lgdt, lgdt),
4119 II(SrcMem | Priv, em_lidt, lidt),
4120 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4121 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4122 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4128 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4129 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4133 static const struct opcode group8[] = {
4135 F(DstMem | SrcImmByte | NoWrite, em_bt),
4136 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4137 F(DstMem | SrcImmByte | Lock, em_btr),
4138 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4142 * The "memory" destination is actually always a register, since we come
4143 * from the register case of group9.
4145 static const struct gprefix pfx_0f_c7_7 = {
4146 N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
4150 static const struct group_dual group9 = { {
4151 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4153 N, N, N, N, N, N, N,
4154 GP(0, &pfx_0f_c7_7),
4157 static const struct opcode group11[] = {
4158 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4162 static const struct gprefix pfx_0f_ae_7 = {
4163 I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N,
4166 static const struct group_dual group15 = { {
4167 I(ModRM | Aligned16, em_fxsave),
4168 I(ModRM | Aligned16, em_fxrstor),
4169 N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4171 N, N, N, N, N, N, N, N,
4174 static const struct gprefix pfx_0f_6f_0f_7f = {
4175 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4178 static const struct instr_dual instr_dual_0f_2b = {
4182 static const struct gprefix pfx_0f_2b = {
4183 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4186 static const struct gprefix pfx_0f_10_0f_11 = {
4187 I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
4190 static const struct gprefix pfx_0f_28_0f_29 = {
4191 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4194 static const struct gprefix pfx_0f_e7 = {
4195 N, I(Sse, em_mov), N, N,
4198 static const struct escape escape_d9 = { {
4199 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4202 N, N, N, N, N, N, N, N,
4204 N, N, N, N, N, N, N, N,
4206 N, N, N, N, N, N, N, N,
4208 N, N, N, N, N, N, N, N,
4210 N, N, N, N, N, N, N, N,
4212 N, N, N, N, N, N, N, N,
4214 N, N, N, N, N, N, N, N,
4216 N, N, N, N, N, N, N, N,
4219 static const struct escape escape_db = { {
4220 N, N, N, N, N, N, N, N,
4223 N, N, N, N, N, N, N, N,
4225 N, N, N, N, N, N, N, N,
4227 N, N, N, N, N, N, N, N,
4229 N, N, N, N, N, N, N, N,
4231 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4233 N, N, N, N, N, N, N, N,
4235 N, N, N, N, N, N, N, N,
4237 N, N, N, N, N, N, N, N,
4240 static const struct escape escape_dd = { {
4241 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4244 N, N, N, N, N, N, N, N,
4246 N, N, N, N, N, N, N, N,
4248 N, N, N, N, N, N, N, N,
4250 N, N, N, N, N, N, N, N,
4252 N, N, N, N, N, N, N, N,
4254 N, N, N, N, N, N, N, N,
4256 N, N, N, N, N, N, N, N,
4258 N, N, N, N, N, N, N, N,
4261 static const struct instr_dual instr_dual_0f_c3 = {
4262 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4265 static const struct mode_dual mode_dual_63 = {
4266 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4269 static const struct instr_dual instr_dual_8d = {
4270 D(DstReg | SrcMem | ModRM | NoAccess), N
4273 static const struct opcode opcode_table[256] = {
4275 F6ALU(Lock, em_add),
4276 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4277 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4279 F6ALU(Lock | PageTable, em_or),
4280 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4283 F6ALU(Lock, em_adc),
4284 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4285 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4287 F6ALU(Lock, em_sbb),
4288 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4289 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4291 F6ALU(Lock | PageTable, em_and), N, N,
4293 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4295 F6ALU(Lock, em_xor), N, N,
4297 F6ALU(NoWrite, em_cmp), N, N,
4299 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4301 X8(I(SrcReg | Stack, em_push)),
4303 X8(I(DstReg | Stack, em_pop)),
4305 I(ImplicitOps | Stack | No64, em_pusha),
4306 I(ImplicitOps | Stack | No64, em_popa),
4307 N, MD(ModRM, &mode_dual_63),
4310 I(SrcImm | Mov | Stack, em_push),
4311 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4312 I(SrcImmByte | Mov | Stack, em_push),
4313 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4314 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4315 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4317 X16(D(SrcImmByte | NearBranch | IsBranch)),
4319 G(ByteOp | DstMem | SrcImm, group1),
4320 G(DstMem | SrcImm, group1),
4321 G(ByteOp | DstMem | SrcImm | No64, group1),
4322 G(DstMem | SrcImmByte, group1),
4323 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4324 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4326 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4327 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4328 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4329 ID(0, &instr_dual_8d),
4330 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4333 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4335 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4336 I(SrcImmFAddr | No64 | IsBranch, em_call_far), N,
4337 II(ImplicitOps | Stack, em_pushf, pushf),
4338 II(ImplicitOps | Stack, em_popf, popf),
4339 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4341 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4342 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4343 I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
4344 F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
4346 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4347 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4348 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4349 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4351 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4353 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4355 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4356 I(ImplicitOps | NearBranch | SrcImmU16 | IsBranch, em_ret_near_imm),
4357 I(ImplicitOps | NearBranch | IsBranch, em_ret),
4358 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4359 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4360 G(ByteOp, group11), G(0, group11),
4362 I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
4363 I(Stack | IsBranch, em_leave),
4364 I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
4365 I(ImplicitOps | IsBranch, em_ret_far),
4366 D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
4367 D(ImplicitOps | No64 | IsBranch),
4368 II(ImplicitOps | IsBranch, em_iret, iret),
4370 G(Src2One | ByteOp, group2), G(Src2One, group2),
4371 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4372 I(DstAcc | SrcImmUByte | No64, em_aam),
4373 I(DstAcc | SrcImmUByte | No64, em_aad),
4374 F(DstAcc | ByteOp | No64, em_salc),
4375 I(DstAcc | SrcXLat | ByteOp, em_mov),
4377 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4379 X3(I(SrcImmByte | NearBranch | IsBranch, em_loop)),
4380 I(SrcImmByte | NearBranch | IsBranch, em_jcxz),
4381 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4382 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4384 I(SrcImm | NearBranch | IsBranch, em_call),
4385 D(SrcImm | ImplicitOps | NearBranch | IsBranch),
4386 I(SrcImmFAddr | No64 | IsBranch, em_jmp_far),
4387 D(SrcImmByte | ImplicitOps | NearBranch | IsBranch),
4388 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4389 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4391 N, DI(ImplicitOps, icebp), N, N,
4392 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4393 G(ByteOp, group3), G(0, group3),
4395 D(ImplicitOps), D(ImplicitOps),
4396 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4397 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4400 static const struct opcode twobyte_table[256] = {
4402 G(0, group6), GD(0, &group7), N, N,
4403 N, I(ImplicitOps | EmulateOnUD | IsBranch, em_syscall),
4404 II(ImplicitOps | Priv, em_clts, clts), N,
4405 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4406 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4408 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
4409 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
4411 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 4 * prefetch + 4 * reserved NOP */
4412 D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4413 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4414 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4415 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 8 * reserved NOP */
4416 D(ImplicitOps | ModRM | SrcMem | NoAccess), /* NOP + 7 * reserved NOP */
4418 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_access),
4419 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4420 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4422 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4425 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4426 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4427 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4430 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4431 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4432 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4433 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4434 I(ImplicitOps | EmulateOnUD | IsBranch, em_sysenter),
4435 I(ImplicitOps | Priv | EmulateOnUD | IsBranch, em_sysexit),
4437 N, N, N, N, N, N, N, N,
4439 X16(D(DstReg | SrcMem | ModRM)),
4441 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4446 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4451 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4453 X16(D(SrcImm | NearBranch | IsBranch)),
4455 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4457 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4458 II(ImplicitOps, em_cpuid, cpuid),
4459 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4460 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4461 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4463 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4464 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4465 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4466 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4467 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4468 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4470 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4471 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4472 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4473 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4474 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4475 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4479 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4480 I(DstReg | SrcMem | ModRM, em_bsf_c),
4481 I(DstReg | SrcMem | ModRM, em_bsr_c),
4482 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4484 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4485 N, ID(0, &instr_dual_0f_c3),
4486 N, N, N, GD(0, &group9),
4488 X8(I(DstReg, em_bswap)),
4490 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4492 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4493 N, N, N, N, N, N, N, N,
4495 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4498 static const struct instr_dual instr_dual_0f_38_f0 = {
4499 I(DstReg | SrcMem | Mov, em_movbe), N
4502 static const struct instr_dual instr_dual_0f_38_f1 = {
4503 I(DstMem | SrcReg | Mov, em_movbe), N
4506 static const struct gprefix three_byte_0f_38_f0 = {
4507 ID(0, &instr_dual_0f_38_f0), N, N, N
4510 static const struct gprefix three_byte_0f_38_f1 = {
4511 ID(0, &instr_dual_0f_38_f1), N, N, N
4515 * Insns below are selected by the prefix which indexed by the third opcode
4518 static const struct opcode opcode_map_0f_38[256] = {
4520 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4522 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4524 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4525 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4546 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4550 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4556 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4557 unsigned size, bool sign_extension)
4559 int rc = X86EMUL_CONTINUE;
4563 op->addr.mem.ea = ctxt->_eip;
4564 /* NB. Immediates are sign-extended as necessary. */
4565 switch (op->bytes) {
4567 op->val = insn_fetch(s8, ctxt);
4570 op->val = insn_fetch(s16, ctxt);
4573 op->val = insn_fetch(s32, ctxt);
4576 op->val = insn_fetch(s64, ctxt);
4579 if (!sign_extension) {
4580 switch (op->bytes) {
4588 op->val &= 0xffffffff;
4596 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4599 int rc = X86EMUL_CONTINUE;
4603 decode_register_operand(ctxt, op);
4606 rc = decode_imm(ctxt, op, 1, false);
4609 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4613 if (ctxt->d & BitOp)
4614 fetch_bit_operand(ctxt);
4615 op->orig_val = op->val;
4618 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4622 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4623 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4624 fetch_register_operand(op);
4625 op->orig_val = op->val;
4629 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4630 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4631 fetch_register_operand(op);
4632 op->orig_val = op->val;
4635 if (ctxt->d & ByteOp) {
4640 op->bytes = ctxt->op_bytes;
4641 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4642 fetch_register_operand(op);
4643 op->orig_val = op->val;
4647 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4649 register_address(ctxt, VCPU_REGS_RDI);
4650 op->addr.mem.seg = VCPU_SREG_ES;
4657 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4658 fetch_register_operand(op);
4663 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4666 rc = decode_imm(ctxt, op, 1, true);
4674 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4677 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4680 ctxt->memop.bytes = 1;
4681 if (ctxt->memop.type == OP_REG) {
4682 ctxt->memop.addr.reg = decode_register(ctxt,
4683 ctxt->modrm_rm, true);
4684 fetch_register_operand(&ctxt->memop);
4688 ctxt->memop.bytes = 2;
4691 ctxt->memop.bytes = 4;
4694 rc = decode_imm(ctxt, op, 2, false);
4697 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4701 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4703 register_address(ctxt, VCPU_REGS_RSI);
4704 op->addr.mem.seg = ctxt->seg_override;
4710 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4713 reg_read(ctxt, VCPU_REGS_RBX) +
4714 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4715 op->addr.mem.seg = ctxt->seg_override;
4720 op->addr.mem.ea = ctxt->_eip;
4721 op->bytes = ctxt->op_bytes + 2;
4722 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4725 ctxt->memop.bytes = ctxt->op_bytes + 2;
4729 op->val = VCPU_SREG_ES;
4733 op->val = VCPU_SREG_CS;
4737 op->val = VCPU_SREG_SS;
4741 op->val = VCPU_SREG_DS;
4745 op->val = VCPU_SREG_FS;
4749 op->val = VCPU_SREG_GS;
4752 /* Special instructions do their own operand decoding. */
4754 op->type = OP_NONE; /* Disable writeback. */
4762 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
4764 int rc = X86EMUL_CONTINUE;
4765 int mode = ctxt->mode;
4766 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4767 bool op_prefix = false;
4768 bool has_seg_override = false;
4769 struct opcode opcode;
4771 struct desc_struct desc;
4773 ctxt->memop.type = OP_NONE;
4774 ctxt->memopp = NULL;
4775 ctxt->_eip = ctxt->eip;
4776 ctxt->fetch.ptr = ctxt->fetch.data;
4777 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4778 ctxt->opcode_len = 1;
4779 ctxt->intercept = x86_intercept_none;
4781 memcpy(ctxt->fetch.data, insn, insn_len);
4783 rc = __do_insn_fetch_bytes(ctxt, 1);
4784 if (rc != X86EMUL_CONTINUE)
4789 case X86EMUL_MODE_REAL:
4790 case X86EMUL_MODE_VM86:
4791 def_op_bytes = def_ad_bytes = 2;
4792 ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS);
4794 def_op_bytes = def_ad_bytes = 4;
4796 case X86EMUL_MODE_PROT16:
4797 def_op_bytes = def_ad_bytes = 2;
4799 case X86EMUL_MODE_PROT32:
4800 def_op_bytes = def_ad_bytes = 4;
4802 #ifdef CONFIG_X86_64
4803 case X86EMUL_MODE_PROT64:
4809 return EMULATION_FAILED;
4812 ctxt->op_bytes = def_op_bytes;
4813 ctxt->ad_bytes = def_ad_bytes;
4815 /* Legacy prefixes. */
4817 switch (ctxt->b = insn_fetch(u8, ctxt)) {
4818 case 0x66: /* operand-size override */
4820 /* switch between 2/4 bytes */
4821 ctxt->op_bytes = def_op_bytes ^ 6;
4823 case 0x67: /* address-size override */
4824 if (mode == X86EMUL_MODE_PROT64)
4825 /* switch between 4/8 bytes */
4826 ctxt->ad_bytes = def_ad_bytes ^ 12;
4828 /* switch between 2/4 bytes */
4829 ctxt->ad_bytes = def_ad_bytes ^ 6;
4831 case 0x26: /* ES override */
4832 has_seg_override = true;
4833 ctxt->seg_override = VCPU_SREG_ES;
4835 case 0x2e: /* CS override */
4836 has_seg_override = true;
4837 ctxt->seg_override = VCPU_SREG_CS;
4839 case 0x36: /* SS override */
4840 has_seg_override = true;
4841 ctxt->seg_override = VCPU_SREG_SS;
4843 case 0x3e: /* DS override */
4844 has_seg_override = true;
4845 ctxt->seg_override = VCPU_SREG_DS;
4847 case 0x64: /* FS override */
4848 has_seg_override = true;
4849 ctxt->seg_override = VCPU_SREG_FS;
4851 case 0x65: /* GS override */
4852 has_seg_override = true;
4853 ctxt->seg_override = VCPU_SREG_GS;
4855 case 0x40 ... 0x4f: /* REX */
4856 if (mode != X86EMUL_MODE_PROT64)
4858 ctxt->rex_prefix = ctxt->b;
4860 case 0xf0: /* LOCK */
4861 ctxt->lock_prefix = 1;
4863 case 0xf2: /* REPNE/REPNZ */
4864 case 0xf3: /* REP/REPE/REPZ */
4865 ctxt->rep_prefix = ctxt->b;
4871 /* Any legacy prefix after a REX prefix nullifies its effect. */
4873 ctxt->rex_prefix = 0;
4879 if (ctxt->rex_prefix & 8)
4880 ctxt->op_bytes = 8; /* REX.W */
4882 /* Opcode byte(s). */
4883 opcode = opcode_table[ctxt->b];
4884 /* Two-byte opcode? */
4885 if (ctxt->b == 0x0f) {
4886 ctxt->opcode_len = 2;
4887 ctxt->b = insn_fetch(u8, ctxt);
4888 opcode = twobyte_table[ctxt->b];
4890 /* 0F_38 opcode map */
4891 if (ctxt->b == 0x38) {
4892 ctxt->opcode_len = 3;
4893 ctxt->b = insn_fetch(u8, ctxt);
4894 opcode = opcode_map_0f_38[ctxt->b];
4897 ctxt->d = opcode.flags;
4899 if (ctxt->d & ModRM)
4900 ctxt->modrm = insn_fetch(u8, ctxt);
4902 /* vex-prefix instructions are not implemented */
4903 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4904 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
4908 while (ctxt->d & GroupMask) {
4909 switch (ctxt->d & GroupMask) {
4911 goffset = (ctxt->modrm >> 3) & 7;
4912 opcode = opcode.u.group[goffset];
4915 goffset = (ctxt->modrm >> 3) & 7;
4916 if ((ctxt->modrm >> 6) == 3)
4917 opcode = opcode.u.gdual->mod3[goffset];
4919 opcode = opcode.u.gdual->mod012[goffset];
4922 goffset = ctxt->modrm & 7;
4923 opcode = opcode.u.group[goffset];
4926 if (ctxt->rep_prefix && op_prefix)
4927 return EMULATION_FAILED;
4928 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
4929 switch (simd_prefix) {
4930 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
4931 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
4932 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
4933 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4937 if (ctxt->modrm > 0xbf) {
4938 size_t size = ARRAY_SIZE(opcode.u.esc->high);
4939 u32 index = array_index_nospec(
4940 ctxt->modrm - 0xc0, size);
4942 opcode = opcode.u.esc->high[index];
4944 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
4948 if ((ctxt->modrm >> 6) == 3)
4949 opcode = opcode.u.idual->mod3;
4951 opcode = opcode.u.idual->mod012;
4954 if (ctxt->mode == X86EMUL_MODE_PROT64)
4955 opcode = opcode.u.mdual->mode64;
4957 opcode = opcode.u.mdual->mode32;
4960 return EMULATION_FAILED;
4963 ctxt->d &= ~(u64)GroupMask;
4964 ctxt->d |= opcode.flags;
4967 ctxt->is_branch = opcode.flags & IsBranch;
4971 return EMULATION_FAILED;
4973 ctxt->execute = opcode.u.execute;
4975 if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
4976 likely(!(ctxt->d & EmulateOnUD)))
4977 return EMULATION_FAILED;
4979 if (unlikely(ctxt->d &
4980 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
4983 * These are copied unconditionally here, and checked unconditionally
4984 * in x86_emulate_insn.
4986 ctxt->check_perm = opcode.check_perm;
4987 ctxt->intercept = opcode.intercept;
4989 if (ctxt->d & NotImpl)
4990 return EMULATION_FAILED;
4992 if (mode == X86EMUL_MODE_PROT64) {
4993 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
4995 else if (ctxt->d & NearBranch)
4999 if (ctxt->d & Op3264) {
5000 if (mode == X86EMUL_MODE_PROT64)
5006 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
5010 ctxt->op_bytes = 16;
5011 else if (ctxt->d & Mmx)
5015 /* ModRM and SIB bytes. */
5016 if (ctxt->d & ModRM) {
5017 rc = decode_modrm(ctxt, &ctxt->memop);
5018 if (!has_seg_override) {
5019 has_seg_override = true;
5020 ctxt->seg_override = ctxt->modrm_seg;
5022 } else if (ctxt->d & MemAbs)
5023 rc = decode_abs(ctxt, &ctxt->memop);
5024 if (rc != X86EMUL_CONTINUE)
5027 if (!has_seg_override)
5028 ctxt->seg_override = VCPU_SREG_DS;
5030 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5033 * Decode and fetch the source operand: register, memory
5036 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5037 if (rc != X86EMUL_CONTINUE)
5041 * Decode and fetch the second source operand: register, memory
5044 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5045 if (rc != X86EMUL_CONTINUE)
5048 /* Decode and fetch the destination operand: register or memory. */
5049 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5051 if (ctxt->rip_relative && likely(ctxt->memopp))
5052 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5053 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5056 if (rc == X86EMUL_PROPAGATE_FAULT)
5057 ctxt->have_exception = true;
5058 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5061 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5063 return ctxt->d & PageTable;
5066 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5068 /* The second termination condition only applies for REPE
5069 * and REPNE. Test if the repeat string operation prefix is
5070 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5071 * corresponding termination condition according to:
5072 * - if REPE/REPZ and ZF = 0 then done
5073 * - if REPNE/REPNZ and ZF = 1 then done
5075 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5076 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5077 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5078 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5079 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5080 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5086 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5091 rc = asm_safe("fwait");
5094 if (unlikely(rc != X86EMUL_CONTINUE))
5095 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5097 return X86EMUL_CONTINUE;
5100 static void fetch_possible_mmx_operand(struct operand *op)
5102 if (op->type == OP_MM)
5103 kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
5106 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
5108 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5110 if (!(ctxt->d & ByteOp))
5111 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5113 asm("push %[flags]; popf; " CALL_NOSPEC " ; pushf; pop %[flags]\n"
5114 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5115 [thunk_target]"+S"(fop), ASM_CALL_CONSTRAINT
5116 : "c"(ctxt->src2.val));
5118 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5119 if (!fop) /* exception is returned in fop variable */
5120 return emulate_de(ctxt);
5121 return X86EMUL_CONTINUE;
5124 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5126 /* Clear fields that are set conditionally but read without a guard. */
5127 ctxt->rip_relative = false;
5128 ctxt->rex_prefix = 0;
5129 ctxt->lock_prefix = 0;
5130 ctxt->rep_prefix = 0;
5131 ctxt->regs_valid = 0;
5132 ctxt->regs_dirty = 0;
5134 ctxt->io_read.pos = 0;
5135 ctxt->io_read.end = 0;
5136 ctxt->mem_read.end = 0;
5139 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5141 const struct x86_emulate_ops *ops = ctxt->ops;
5142 int rc = X86EMUL_CONTINUE;
5143 int saved_dst_type = ctxt->dst.type;
5144 bool is_guest_mode = ctxt->ops->is_guest_mode(ctxt);
5146 ctxt->mem_read.pos = 0;
5148 /* LOCK prefix is allowed only with some instructions */
5149 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5150 rc = emulate_ud(ctxt);
5154 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5155 rc = emulate_ud(ctxt);
5159 if (unlikely(ctxt->d &
5160 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5161 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5162 (ctxt->d & Undefined)) {
5163 rc = emulate_ud(ctxt);
5167 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5168 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5169 rc = emulate_ud(ctxt);
5173 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5174 rc = emulate_nm(ctxt);
5178 if (ctxt->d & Mmx) {
5179 rc = flush_pending_x87_faults(ctxt);
5180 if (rc != X86EMUL_CONTINUE)
5183 * Now that we know the fpu is exception safe, we can fetch
5186 fetch_possible_mmx_operand(&ctxt->src);
5187 fetch_possible_mmx_operand(&ctxt->src2);
5188 if (!(ctxt->d & Mov))
5189 fetch_possible_mmx_operand(&ctxt->dst);
5192 if (unlikely(is_guest_mode) && ctxt->intercept) {
5193 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5194 X86_ICPT_PRE_EXCEPT);
5195 if (rc != X86EMUL_CONTINUE)
5199 /* Instruction can only be executed in protected mode */
5200 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5201 rc = emulate_ud(ctxt);
5205 /* Privileged instruction can be executed only in CPL=0 */
5206 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5207 if (ctxt->d & PrivUD)
5208 rc = emulate_ud(ctxt);
5210 rc = emulate_gp(ctxt, 0);
5214 /* Do instruction specific permission checks */
5215 if (ctxt->d & CheckPerm) {
5216 rc = ctxt->check_perm(ctxt);
5217 if (rc != X86EMUL_CONTINUE)
5221 if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
5222 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5223 X86_ICPT_POST_EXCEPT);
5224 if (rc != X86EMUL_CONTINUE)
5228 if (ctxt->rep_prefix && (ctxt->d & String)) {
5229 /* All REP prefixes have the same first termination condition */
5230 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5231 string_registers_quirk(ctxt);
5232 ctxt->eip = ctxt->_eip;
5233 ctxt->eflags &= ~X86_EFLAGS_RF;
5239 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5240 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5241 ctxt->src.valptr, ctxt->src.bytes);
5242 if (rc != X86EMUL_CONTINUE)
5244 ctxt->src.orig_val64 = ctxt->src.val64;
5247 if (ctxt->src2.type == OP_MEM) {
5248 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5249 &ctxt->src2.val, ctxt->src2.bytes);
5250 if (rc != X86EMUL_CONTINUE)
5254 if ((ctxt->d & DstMask) == ImplicitOps)
5258 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5259 /* optimisation - avoid slow emulated read if Mov */
5260 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5261 &ctxt->dst.val, ctxt->dst.bytes);
5262 if (rc != X86EMUL_CONTINUE) {
5263 if (!(ctxt->d & NoWrite) &&
5264 rc == X86EMUL_PROPAGATE_FAULT &&
5265 ctxt->exception.vector == PF_VECTOR)
5266 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5270 /* Copy full 64-bit value for CMPXCHG8B. */
5271 ctxt->dst.orig_val64 = ctxt->dst.val64;
5275 if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
5276 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5277 X86_ICPT_POST_MEMACCESS);
5278 if (rc != X86EMUL_CONTINUE)
5282 if (ctxt->rep_prefix && (ctxt->d & String))
5283 ctxt->eflags |= X86_EFLAGS_RF;
5285 ctxt->eflags &= ~X86_EFLAGS_RF;
5287 if (ctxt->execute) {
5288 if (ctxt->d & Fastop)
5289 rc = fastop(ctxt, ctxt->fop);
5291 rc = ctxt->execute(ctxt);
5292 if (rc != X86EMUL_CONTINUE)
5297 if (ctxt->opcode_len == 2)
5299 else if (ctxt->opcode_len == 3)
5300 goto threebyte_insn;
5303 case 0x70 ... 0x7f: /* jcc (short) */
5304 if (test_cc(ctxt->b, ctxt->eflags))
5305 rc = jmp_rel(ctxt, ctxt->src.val);
5307 case 0x8d: /* lea r16/r32, m */
5308 ctxt->dst.val = ctxt->src.addr.mem.ea;
5310 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5311 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5312 ctxt->dst.type = OP_NONE;
5316 case 0x98: /* cbw/cwde/cdqe */
5317 switch (ctxt->op_bytes) {
5318 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5319 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5320 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5323 case 0xcc: /* int3 */
5324 rc = emulate_int(ctxt, 3);
5326 case 0xcd: /* int n */
5327 rc = emulate_int(ctxt, ctxt->src.val);
5329 case 0xce: /* into */
5330 if (ctxt->eflags & X86_EFLAGS_OF)
5331 rc = emulate_int(ctxt, 4);
5333 case 0xe9: /* jmp rel */
5334 case 0xeb: /* jmp rel short */
5335 rc = jmp_rel(ctxt, ctxt->src.val);
5336 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5338 case 0xf4: /* hlt */
5339 ctxt->ops->halt(ctxt);
5341 case 0xf5: /* cmc */
5342 /* complement carry flag from eflags reg */
5343 ctxt->eflags ^= X86_EFLAGS_CF;
5345 case 0xf8: /* clc */
5346 ctxt->eflags &= ~X86_EFLAGS_CF;
5348 case 0xf9: /* stc */
5349 ctxt->eflags |= X86_EFLAGS_CF;
5351 case 0xfc: /* cld */
5352 ctxt->eflags &= ~X86_EFLAGS_DF;
5354 case 0xfd: /* std */
5355 ctxt->eflags |= X86_EFLAGS_DF;
5358 goto cannot_emulate;
5361 if (rc != X86EMUL_CONTINUE)
5365 if (ctxt->d & SrcWrite) {
5366 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5367 rc = writeback(ctxt, &ctxt->src);
5368 if (rc != X86EMUL_CONTINUE)
5371 if (!(ctxt->d & NoWrite)) {
5372 rc = writeback(ctxt, &ctxt->dst);
5373 if (rc != X86EMUL_CONTINUE)
5378 * restore dst type in case the decoding will be reused
5379 * (happens for string instruction )
5381 ctxt->dst.type = saved_dst_type;
5383 if ((ctxt->d & SrcMask) == SrcSI)
5384 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5386 if ((ctxt->d & DstMask) == DstDI)
5387 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5389 if (ctxt->rep_prefix && (ctxt->d & String)) {
5391 struct read_cache *r = &ctxt->io_read;
5392 if ((ctxt->d & SrcMask) == SrcSI)
5393 count = ctxt->src.count;
5395 count = ctxt->dst.count;
5396 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5398 if (!string_insn_completed(ctxt)) {
5400 * Re-enter guest when pio read ahead buffer is empty
5401 * or, if it is not used, after each 1024 iteration.
5403 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5404 (r->end == 0 || r->end != r->pos)) {
5406 * Reset read cache. Usually happens before
5407 * decode, but since instruction is restarted
5408 * we have to do it here.
5410 ctxt->mem_read.end = 0;
5411 writeback_registers(ctxt);
5412 return EMULATION_RESTART;
5414 goto done; /* skip rip writeback */
5416 ctxt->eflags &= ~X86_EFLAGS_RF;
5419 ctxt->eip = ctxt->_eip;
5420 if (ctxt->mode != X86EMUL_MODE_PROT64)
5421 ctxt->eip = (u32)ctxt->_eip;
5424 if (rc == X86EMUL_PROPAGATE_FAULT) {
5425 if (KVM_EMULATOR_BUG_ON(ctxt->exception.vector > 0x1f, ctxt))
5426 return EMULATION_FAILED;
5427 ctxt->have_exception = true;
5429 if (rc == X86EMUL_INTERCEPTED)
5430 return EMULATION_INTERCEPTED;
5432 if (rc == X86EMUL_CONTINUE)
5433 writeback_registers(ctxt);
5435 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5439 case 0x09: /* wbinvd */
5440 (ctxt->ops->wbinvd)(ctxt);
5442 case 0x08: /* invd */
5443 case 0x0d: /* GrpP (prefetch) */
5444 case 0x18: /* Grp16 (prefetch/nop) */
5445 case 0x1f: /* nop */
5447 case 0x20: /* mov cr, reg */
5448 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5450 case 0x21: /* mov from dr to reg */
5451 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5453 case 0x40 ... 0x4f: /* cmov */
5454 if (test_cc(ctxt->b, ctxt->eflags))
5455 ctxt->dst.val = ctxt->src.val;
5456 else if (ctxt->op_bytes != 4)
5457 ctxt->dst.type = OP_NONE; /* no writeback */
5459 case 0x80 ... 0x8f: /* jnz rel, etc*/
5460 if (test_cc(ctxt->b, ctxt->eflags))
5461 rc = jmp_rel(ctxt, ctxt->src.val);
5463 case 0x90 ... 0x9f: /* setcc r/m8 */
5464 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5466 case 0xb6 ... 0xb7: /* movzx */
5467 ctxt->dst.bytes = ctxt->op_bytes;
5468 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5469 : (u16) ctxt->src.val;
5471 case 0xbe ... 0xbf: /* movsx */
5472 ctxt->dst.bytes = ctxt->op_bytes;
5473 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5474 (s16) ctxt->src.val;
5477 goto cannot_emulate;
5482 if (rc != X86EMUL_CONTINUE)
5488 return EMULATION_FAILED;
5491 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5493 invalidate_registers(ctxt);
5496 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5498 writeback_registers(ctxt);
5501 bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
5503 if (ctxt->rep_prefix && (ctxt->d & String))
5506 if (ctxt->d & TwoMemOp)