1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
13 * Avi Kivity <avi@qumranet.com>
14 * Yaniv Kamay <yaniv@qumranet.com>
16 * This work is licensed under the terms of the GNU GPL, version 2. See
17 * the COPYING file in the top-level directory.
19 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
25 #include <public/xen.h>
26 #define DPRINTF(_f, _a ...) printf(_f , ## _a)
28 #include <linux/kvm_host.h>
29 #include "kvm_cache_regs.h"
30 #define DPRINTF(x...) do {} while (0)
32 #include <linux/module.h>
33 #include <asm/kvm_emulate.h>
39 * Opcode effective-address decode tables.
40 * Note that we only emulate instructions that have at least one memory
41 * operand (excluding implicit stack references). We assume that stack
42 * references and instruction fetches will never occur in special memory
43 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
47 /* Operand sizes: 8-bit operands or specified/overridden size. */
48 #define ByteOp (1<<0) /* 8-bit operands. */
49 /* Destination operand type. */
50 #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
51 #define DstReg (2<<1) /* Register operand. */
52 #define DstMem (3<<1) /* Memory operand. */
53 #define DstAcc (4<<1) /* Destination Accumulator */
54 #define DstDI (5<<1) /* Destination is in ES:(E)DI */
55 #define DstMem64 (6<<1) /* 64bit memory operand */
56 #define DstMask (7<<1)
57 /* Source operand type. */
58 #define SrcNone (0<<4) /* No source operand. */
59 #define SrcImplicit (0<<4) /* Source operand is implicit in the opcode. */
60 #define SrcReg (1<<4) /* Register operand. */
61 #define SrcMem (2<<4) /* Memory operand. */
62 #define SrcMem16 (3<<4) /* Memory operand (16-bit). */
63 #define SrcMem32 (4<<4) /* Memory operand (32-bit). */
64 #define SrcImm (5<<4) /* Immediate operand. */
65 #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
66 #define SrcOne (7<<4) /* Implied '1' */
67 #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
68 #define SrcImmU (9<<4) /* Immediate operand, unsigned */
69 #define SrcSI (0xa<<4) /* Source is in the DS:RSI */
70 #define SrcImmFAddr (0xb<<4) /* Source is immediate far address */
71 #define SrcMemFAddr (0xc<<4) /* Source is far address in memory */
72 #define SrcMask (0xf<<4)
73 /* Generic ModRM decode. */
75 /* Destination is only written; never read. */
78 #define MemAbs (1<<11) /* Memory operand is absolute displacement */
79 #define String (1<<12) /* String instruction (rep capable) */
80 #define Stack (1<<13) /* Stack instruction (push/pop) */
81 #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
82 #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
83 #define GroupMask 0xff /* Group number stored in bits 0:7 */
85 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
86 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
88 /* Source 2 operand type */
89 #define Src2None (0<<29)
90 #define Src2CL (1<<29)
91 #define Src2ImmByte (2<<29)
92 #define Src2One (3<<29)
93 #define Src2Mask (7<<29)
96 Group1_80, Group1_81, Group1_82, Group1_83,
97 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
101 static u32 opcode_table[256] = {
103 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
104 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
105 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
106 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
108 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
109 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
110 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
111 ImplicitOps | Stack | No64, 0,
113 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
114 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
115 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
116 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
118 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
119 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
120 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
121 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
123 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
124 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
125 DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
127 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
128 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
131 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
132 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
135 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
136 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
137 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
140 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
142 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
144 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
145 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
147 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
148 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
150 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
151 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
154 SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
155 DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */
156 SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */
158 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
159 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
161 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
162 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
164 Group | Group1_80, Group | Group1_81,
165 Group | Group1_82, Group | Group1_83,
166 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
167 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
169 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
170 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
171 DstMem | SrcReg | ModRM | Mov, ModRM | DstReg,
172 ImplicitOps | SrcMem | ModRM, Group | Group1A,
174 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
176 0, 0, SrcImmFAddr | No64, 0,
177 ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
179 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
180 ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
181 ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String,
182 ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String,
184 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String,
185 ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String,
186 ByteOp | DstDI | String, DstDI | String,
188 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
189 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
190 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
191 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
193 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
194 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
195 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
196 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
198 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
199 0, ImplicitOps | Stack, 0, 0,
200 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
202 0, 0, 0, ImplicitOps | Stack,
203 ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
205 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
206 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
209 0, 0, 0, 0, 0, 0, 0, 0,
212 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
213 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
215 SrcImm | Stack, SrcImm | ImplicitOps,
216 SrcImmFAddr | No64, SrcImmByte | ImplicitOps,
217 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
218 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
221 ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
223 ImplicitOps, 0, ImplicitOps, ImplicitOps,
224 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
227 static u32 twobyte_table[256] = {
229 0, Group | GroupDual | Group7, 0, 0,
230 0, ImplicitOps, ImplicitOps | Priv, 0,
231 ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
232 0, ImplicitOps | ModRM, 0, 0,
234 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
236 ModRM | ImplicitOps | Priv, ModRM | Priv,
237 ModRM | ImplicitOps | Priv, ModRM | Priv,
239 0, 0, 0, 0, 0, 0, 0, 0,
241 ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
242 ImplicitOps, ImplicitOps | Priv, 0, 0,
243 0, 0, 0, 0, 0, 0, 0, 0,
245 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
246 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
247 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
248 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
250 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
251 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
252 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
253 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
255 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
257 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
259 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
261 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
262 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
264 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
266 ImplicitOps | Stack, ImplicitOps | Stack,
267 0, DstMem | SrcReg | ModRM | BitOp,
268 DstMem | SrcReg | Src2ImmByte | ModRM,
269 DstMem | SrcReg | Src2CL | ModRM, 0, 0,
271 ImplicitOps | Stack, ImplicitOps | Stack,
272 0, DstMem | SrcReg | ModRM | BitOp | Lock,
273 DstMem | SrcReg | Src2ImmByte | ModRM,
274 DstMem | SrcReg | Src2CL | ModRM,
277 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
278 0, DstMem | SrcReg | ModRM | BitOp | Lock,
279 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
280 DstReg | SrcMem16 | ModRM | Mov,
283 Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
284 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
285 DstReg | SrcMem16 | ModRM | Mov,
287 0, 0, 0, DstMem | SrcReg | ModRM | Mov,
288 0, 0, 0, Group | GroupDual | Group9,
289 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
298 static u32 group_table[] = {
300 ByteOp | DstMem | SrcImm | ModRM | Lock,
301 ByteOp | DstMem | SrcImm | ModRM | Lock,
302 ByteOp | DstMem | SrcImm | ModRM | Lock,
303 ByteOp | DstMem | SrcImm | ModRM | Lock,
304 ByteOp | DstMem | SrcImm | ModRM | Lock,
305 ByteOp | DstMem | SrcImm | ModRM | Lock,
306 ByteOp | DstMem | SrcImm | ModRM | Lock,
307 ByteOp | DstMem | SrcImm | ModRM,
309 DstMem | SrcImm | ModRM | Lock,
310 DstMem | SrcImm | ModRM | Lock,
311 DstMem | SrcImm | ModRM | Lock,
312 DstMem | SrcImm | ModRM | Lock,
313 DstMem | SrcImm | ModRM | Lock,
314 DstMem | SrcImm | ModRM | Lock,
315 DstMem | SrcImm | ModRM | Lock,
316 DstMem | SrcImm | ModRM,
318 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
319 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
320 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
321 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
322 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
323 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
324 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
325 ByteOp | DstMem | SrcImm | ModRM | No64,
327 DstMem | SrcImmByte | ModRM | Lock,
328 DstMem | SrcImmByte | ModRM | Lock,
329 DstMem | SrcImmByte | ModRM | Lock,
330 DstMem | SrcImmByte | ModRM | Lock,
331 DstMem | SrcImmByte | ModRM | Lock,
332 DstMem | SrcImmByte | ModRM | Lock,
333 DstMem | SrcImmByte | ModRM | Lock,
334 DstMem | SrcImmByte | ModRM,
336 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
338 ByteOp | SrcImm | DstMem | ModRM, 0,
339 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
342 DstMem | SrcImm | ModRM, 0,
343 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
346 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
349 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
350 SrcMem | ModRM | Stack, 0,
351 SrcMem | ModRM | Stack, SrcMemFAddr | ModRM | ImplicitOps,
352 SrcMem | ModRM | Stack, 0,
354 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
355 SrcNone | ModRM | DstMem | Mov, 0,
356 SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
359 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
360 DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
362 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0,
365 static u32 group2_table[] = {
367 SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv,
368 SrcNone | ModRM | DstMem | Mov, 0,
369 SrcMem16 | ModRM | Mov | Priv, 0,
371 0, 0, 0, 0, 0, 0, 0, 0,
374 /* EFLAGS bit definitions. */
375 #define EFLG_ID (1<<21)
376 #define EFLG_VIP (1<<20)
377 #define EFLG_VIF (1<<19)
378 #define EFLG_AC (1<<18)
379 #define EFLG_VM (1<<17)
380 #define EFLG_RF (1<<16)
381 #define EFLG_IOPL (3<<12)
382 #define EFLG_NT (1<<14)
383 #define EFLG_OF (1<<11)
384 #define EFLG_DF (1<<10)
385 #define EFLG_IF (1<<9)
386 #define EFLG_TF (1<<8)
387 #define EFLG_SF (1<<7)
388 #define EFLG_ZF (1<<6)
389 #define EFLG_AF (1<<4)
390 #define EFLG_PF (1<<2)
391 #define EFLG_CF (1<<0)
394 * Instruction emulation:
395 * Most instructions are emulated directly via a fragment of inline assembly
396 * code. This allows us to save/restore EFLAGS and thus very easily pick up
397 * any modified flags.
400 #if defined(CONFIG_X86_64)
401 #define _LO32 "k" /* force 32-bit operand */
402 #define _STK "%%rsp" /* stack pointer */
403 #elif defined(__i386__)
404 #define _LO32 "" /* force 32-bit operand */
405 #define _STK "%%esp" /* stack pointer */
409 * These EFLAGS bits are restored from saved value during emulation, and
410 * any changes are written back to the saved value after emulation.
412 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
414 /* Before executing instruction: restore necessary bits in EFLAGS. */
415 #define _PRE_EFLAGS(_sav, _msk, _tmp) \
416 /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
417 "movl %"_sav",%"_LO32 _tmp"; " \
420 "movl %"_msk",%"_LO32 _tmp"; " \
421 "andl %"_LO32 _tmp",("_STK"); " \
423 "notl %"_LO32 _tmp"; " \
424 "andl %"_LO32 _tmp",("_STK"); " \
425 "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \
427 "orl %"_LO32 _tmp",("_STK"); " \
431 /* After executing instruction: write-back necessary bits in EFLAGS. */
432 #define _POST_EFLAGS(_sav, _msk, _tmp) \
433 /* _sav |= EFLAGS & _msk; */ \
436 "andl %"_msk",%"_LO32 _tmp"; " \
437 "orl %"_LO32 _tmp",%"_sav"; "
445 #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
447 __asm__ __volatile__ ( \
448 _PRE_EFLAGS("0", "4", "2") \
449 _op _suffix " %"_x"3,%1; " \
450 _POST_EFLAGS("0", "4", "2") \
451 : "=m" (_eflags), "=m" ((_dst).val), \
453 : _y ((_src).val), "i" (EFLAGS_MASK)); \
457 /* Raw emulation: instruction has two explicit operands. */
458 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
460 unsigned long _tmp; \
462 switch ((_dst).bytes) { \
464 ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
467 ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
470 ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
475 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
477 unsigned long _tmp; \
478 switch ((_dst).bytes) { \
480 ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
483 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
484 _wx, _wy, _lx, _ly, _qx, _qy); \
489 /* Source operand is byte-sized and may be restricted to just %cl. */
490 #define emulate_2op_SrcB(_op, _src, _dst, _eflags) \
491 __emulate_2op(_op, _src, _dst, _eflags, \
492 "b", "c", "b", "c", "b", "c", "b", "c")
494 /* Source operand is byte, word, long or quad sized. */
495 #define emulate_2op_SrcV(_op, _src, _dst, _eflags) \
496 __emulate_2op(_op, _src, _dst, _eflags, \
497 "b", "q", "w", "r", _LO32, "r", "", "r")
499 /* Source operand is word, long or quad sized. */
500 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \
501 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
502 "w", "r", _LO32, "r", "", "r")
504 /* Instruction has three operands and one operand is stored in ECX register */
505 #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
507 unsigned long _tmp; \
508 _type _clv = (_cl).val; \
509 _type _srcv = (_src).val; \
510 _type _dstv = (_dst).val; \
512 __asm__ __volatile__ ( \
513 _PRE_EFLAGS("0", "5", "2") \
514 _op _suffix " %4,%1 \n" \
515 _POST_EFLAGS("0", "5", "2") \
516 : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
517 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
520 (_cl).val = (unsigned long) _clv; \
521 (_src).val = (unsigned long) _srcv; \
522 (_dst).val = (unsigned long) _dstv; \
525 #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
527 switch ((_dst).bytes) { \
529 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
530 "w", unsigned short); \
533 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
534 "l", unsigned int); \
537 ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
538 "q", unsigned long)); \
543 #define __emulate_1op(_op, _dst, _eflags, _suffix) \
545 unsigned long _tmp; \
547 __asm__ __volatile__ ( \
548 _PRE_EFLAGS("0", "3", "2") \
549 _op _suffix " %1; " \
550 _POST_EFLAGS("0", "3", "2") \
551 : "=m" (_eflags), "+m" ((_dst).val), \
553 : "i" (EFLAGS_MASK)); \
556 /* Instruction has only one explicit operand (no source operand). */
557 #define emulate_1op(_op, _dst, _eflags) \
559 switch ((_dst).bytes) { \
560 case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
561 case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
562 case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
563 case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
567 /* Fetch next part of the instruction being emulated. */
568 #define insn_fetch(_type, _size, _eip) \
569 ({ unsigned long _x; \
570 rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \
571 if (rc != X86EMUL_CONTINUE) \
577 #define insn_fetch_arr(_arr, _size, _eip) \
578 ({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \
579 if (rc != X86EMUL_CONTINUE) \
584 static inline unsigned long ad_mask(struct decode_cache *c)
586 return (1UL << (c->ad_bytes << 3)) - 1;
589 /* Access/update address held in a register, based on addressing mode. */
590 static inline unsigned long
591 address_mask(struct decode_cache *c, unsigned long reg)
593 if (c->ad_bytes == sizeof(unsigned long))
596 return reg & ad_mask(c);
599 static inline unsigned long
600 register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
602 return base + address_mask(c, reg);
606 register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
608 if (c->ad_bytes == sizeof(unsigned long))
611 *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
614 static inline void jmp_rel(struct decode_cache *c, int rel)
616 register_address_increment(c, &c->eip, rel);
619 static void set_seg_override(struct decode_cache *c, int seg)
621 c->has_seg_override = true;
622 c->seg_override = seg;
625 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
627 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
630 return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg);
633 static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
634 struct decode_cache *c)
636 if (!c->has_seg_override)
639 return seg_base(ctxt, c->seg_override);
642 static unsigned long es_base(struct x86_emulate_ctxt *ctxt)
644 return seg_base(ctxt, VCPU_SREG_ES);
647 static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
649 return seg_base(ctxt, VCPU_SREG_SS);
652 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
653 struct x86_emulate_ops *ops,
654 unsigned long eip, u8 *dest)
656 struct fetch_cache *fc = &ctxt->decode.fetch;
660 if (eip == fc->end) {
661 cur_size = fc->end - fc->start;
662 size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
663 rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size,
664 size, ctxt->vcpu, NULL);
665 if (rc != X86EMUL_CONTINUE)
669 *dest = fc->data[eip - fc->start];
670 return X86EMUL_CONTINUE;
673 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
674 struct x86_emulate_ops *ops,
675 unsigned long eip, void *dest, unsigned size)
679 /* x86 instructions are limited to 15 bytes. */
680 if (eip + size - ctxt->eip > 15)
681 return X86EMUL_UNHANDLEABLE;
683 rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
684 if (rc != X86EMUL_CONTINUE)
687 return X86EMUL_CONTINUE;
691 * Given the 'reg' portion of a ModRM byte, and a register block, return a
692 * pointer into the block that addresses the relevant register.
693 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
695 static void *decode_register(u8 modrm_reg, unsigned long *regs,
700 p = ®s[modrm_reg];
701 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
702 p = (unsigned char *)®s[modrm_reg & 3] + 1;
706 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
707 struct x86_emulate_ops *ops,
709 u16 *size, unsigned long *address, int op_bytes)
716 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
718 if (rc != X86EMUL_CONTINUE)
720 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
725 static int test_cc(unsigned int condition, unsigned int flags)
729 switch ((condition & 15) >> 1) {
731 rc |= (flags & EFLG_OF);
733 case 1: /* b/c/nae */
734 rc |= (flags & EFLG_CF);
737 rc |= (flags & EFLG_ZF);
740 rc |= (flags & (EFLG_CF|EFLG_ZF));
743 rc |= (flags & EFLG_SF);
746 rc |= (flags & EFLG_PF);
749 rc |= (flags & EFLG_ZF);
752 rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
756 /* Odd condition identifiers (lsb == 1) have inverted sense. */
757 return (!!rc ^ (condition & 1));
760 static void decode_register_operand(struct operand *op,
761 struct decode_cache *c,
764 unsigned reg = c->modrm_reg;
765 int highbyte_regs = c->rex_prefix == 0;
768 reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
770 if ((c->d & ByteOp) && !inhibit_bytereg) {
771 op->ptr = decode_register(reg, c->regs, highbyte_regs);
772 op->val = *(u8 *)op->ptr;
775 op->ptr = decode_register(reg, c->regs, 0);
776 op->bytes = c->op_bytes;
779 op->val = *(u16 *)op->ptr;
782 op->val = *(u32 *)op->ptr;
785 op->val = *(u64 *) op->ptr;
789 op->orig_val = op->val;
792 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
793 struct x86_emulate_ops *ops)
795 struct decode_cache *c = &ctxt->decode;
797 int index_reg = 0, base_reg = 0, scale;
798 int rc = X86EMUL_CONTINUE;
801 c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */
802 index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
803 c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
806 c->modrm = insn_fetch(u8, 1, c->eip);
807 c->modrm_mod |= (c->modrm & 0xc0) >> 6;
808 c->modrm_reg |= (c->modrm & 0x38) >> 3;
809 c->modrm_rm |= (c->modrm & 0x07);
813 if (c->modrm_mod == 3) {
814 c->modrm_ptr = decode_register(c->modrm_rm,
815 c->regs, c->d & ByteOp);
816 c->modrm_val = *(unsigned long *)c->modrm_ptr;
820 if (c->ad_bytes == 2) {
821 unsigned bx = c->regs[VCPU_REGS_RBX];
822 unsigned bp = c->regs[VCPU_REGS_RBP];
823 unsigned si = c->regs[VCPU_REGS_RSI];
824 unsigned di = c->regs[VCPU_REGS_RDI];
826 /* 16-bit ModR/M decode. */
827 switch (c->modrm_mod) {
829 if (c->modrm_rm == 6)
830 c->modrm_ea += insn_fetch(u16, 2, c->eip);
833 c->modrm_ea += insn_fetch(s8, 1, c->eip);
836 c->modrm_ea += insn_fetch(u16, 2, c->eip);
839 switch (c->modrm_rm) {
841 c->modrm_ea += bx + si;
844 c->modrm_ea += bx + di;
847 c->modrm_ea += bp + si;
850 c->modrm_ea += bp + di;
859 if (c->modrm_mod != 0)
866 if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
867 (c->modrm_rm == 6 && c->modrm_mod != 0))
868 if (!c->has_seg_override)
869 set_seg_override(c, VCPU_SREG_SS);
870 c->modrm_ea = (u16)c->modrm_ea;
872 /* 32/64-bit ModR/M decode. */
873 if ((c->modrm_rm & 7) == 4) {
874 sib = insn_fetch(u8, 1, c->eip);
875 index_reg |= (sib >> 3) & 7;
879 if ((base_reg & 7) == 5 && c->modrm_mod == 0)
880 c->modrm_ea += insn_fetch(s32, 4, c->eip);
882 c->modrm_ea += c->regs[base_reg];
884 c->modrm_ea += c->regs[index_reg] << scale;
885 } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
886 if (ctxt->mode == X86EMUL_MODE_PROT64)
889 c->modrm_ea += c->regs[c->modrm_rm];
890 switch (c->modrm_mod) {
892 if (c->modrm_rm == 5)
893 c->modrm_ea += insn_fetch(s32, 4, c->eip);
896 c->modrm_ea += insn_fetch(s8, 1, c->eip);
899 c->modrm_ea += insn_fetch(s32, 4, c->eip);
907 static int decode_abs(struct x86_emulate_ctxt *ctxt,
908 struct x86_emulate_ops *ops)
910 struct decode_cache *c = &ctxt->decode;
911 int rc = X86EMUL_CONTINUE;
913 switch (c->ad_bytes) {
915 c->modrm_ea = insn_fetch(u16, 2, c->eip);
918 c->modrm_ea = insn_fetch(u32, 4, c->eip);
921 c->modrm_ea = insn_fetch(u64, 8, c->eip);
929 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
931 struct decode_cache *c = &ctxt->decode;
932 int rc = X86EMUL_CONTINUE;
933 int mode = ctxt->mode;
934 int def_op_bytes, def_ad_bytes, group;
937 /* we cannot decode insn before we complete previous rep insn */
938 WARN_ON(ctxt->restart);
940 /* Shadow copy of register state. Committed on successful emulation. */
941 memset(c, 0, sizeof(struct decode_cache));
943 c->fetch.start = c->fetch.end = c->eip;
944 ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
945 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
948 case X86EMUL_MODE_REAL:
949 case X86EMUL_MODE_VM86:
950 case X86EMUL_MODE_PROT16:
951 def_op_bytes = def_ad_bytes = 2;
953 case X86EMUL_MODE_PROT32:
954 def_op_bytes = def_ad_bytes = 4;
957 case X86EMUL_MODE_PROT64:
966 c->op_bytes = def_op_bytes;
967 c->ad_bytes = def_ad_bytes;
969 /* Legacy prefixes. */
971 switch (c->b = insn_fetch(u8, 1, c->eip)) {
972 case 0x66: /* operand-size override */
973 /* switch between 2/4 bytes */
974 c->op_bytes = def_op_bytes ^ 6;
976 case 0x67: /* address-size override */
977 if (mode == X86EMUL_MODE_PROT64)
978 /* switch between 4/8 bytes */
979 c->ad_bytes = def_ad_bytes ^ 12;
981 /* switch between 2/4 bytes */
982 c->ad_bytes = def_ad_bytes ^ 6;
984 case 0x26: /* ES override */
985 case 0x2e: /* CS override */
986 case 0x36: /* SS override */
987 case 0x3e: /* DS override */
988 set_seg_override(c, (c->b >> 3) & 3);
990 case 0x64: /* FS override */
991 case 0x65: /* GS override */
992 set_seg_override(c, c->b & 7);
994 case 0x40 ... 0x4f: /* REX */
995 if (mode != X86EMUL_MODE_PROT64)
997 c->rex_prefix = c->b;
999 case 0xf0: /* LOCK */
1002 case 0xf2: /* REPNE/REPNZ */
1003 c->rep_prefix = REPNE_PREFIX;
1005 case 0xf3: /* REP/REPE/REPZ */
1006 c->rep_prefix = REPE_PREFIX;
1012 /* Any legacy prefix after a REX prefix nullifies its effect. */
1021 if (c->rex_prefix & 8)
1022 c->op_bytes = 8; /* REX.W */
1024 /* Opcode byte(s). */
1025 c->d = opcode_table[c->b];
1027 /* Two-byte opcode? */
1030 c->b = insn_fetch(u8, 1, c->eip);
1031 c->d = twobyte_table[c->b];
1036 group = c->d & GroupMask;
1037 c->modrm = insn_fetch(u8, 1, c->eip);
1040 group = (group << 3) + ((c->modrm >> 3) & 7);
1041 if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
1042 c->d = group2_table[group];
1044 c->d = group_table[group];
1049 DPRINTF("Cannot emulate %02x\n", c->b);
1053 if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
1056 /* ModRM and SIB bytes. */
1058 rc = decode_modrm(ctxt, ops);
1059 else if (c->d & MemAbs)
1060 rc = decode_abs(ctxt, ops);
1061 if (rc != X86EMUL_CONTINUE)
1064 if (!c->has_seg_override)
1065 set_seg_override(c, VCPU_SREG_DS);
1067 if (!(!c->twobyte && c->b == 0x8d))
1068 c->modrm_ea += seg_override_base(ctxt, c);
1070 if (c->ad_bytes != 8)
1071 c->modrm_ea = (u32)c->modrm_ea;
1073 if (c->rip_relative)
1074 c->modrm_ea += c->eip;
1077 * Decode and fetch the source operand: register, memory
1080 switch (c->d & SrcMask) {
1084 decode_register_operand(&c->src, c, 0);
1093 c->src.bytes = (c->d & ByteOp) ? 1 :
1095 /* Don't fetch the address for invlpg: it could be unmapped. */
1096 if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
1100 * For instructions with a ModR/M byte, switch to register
1101 * access if Mod = 3.
1103 if ((c->d & ModRM) && c->modrm_mod == 3) {
1104 c->src.type = OP_REG;
1105 c->src.val = c->modrm_val;
1106 c->src.ptr = c->modrm_ptr;
1109 c->src.type = OP_MEM;
1110 c->src.ptr = (unsigned long *)c->modrm_ea;
1115 c->src.type = OP_IMM;
1116 c->src.ptr = (unsigned long *)c->eip;
1117 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1118 if (c->src.bytes == 8)
1120 /* NB. Immediates are sign-extended as necessary. */
1121 switch (c->src.bytes) {
1123 c->src.val = insn_fetch(s8, 1, c->eip);
1126 c->src.val = insn_fetch(s16, 2, c->eip);
1129 c->src.val = insn_fetch(s32, 4, c->eip);
1132 if ((c->d & SrcMask) == SrcImmU) {
1133 switch (c->src.bytes) {
1138 c->src.val &= 0xffff;
1141 c->src.val &= 0xffffffff;
1148 c->src.type = OP_IMM;
1149 c->src.ptr = (unsigned long *)c->eip;
1151 if ((c->d & SrcMask) == SrcImmByte)
1152 c->src.val = insn_fetch(s8, 1, c->eip);
1154 c->src.val = insn_fetch(u8, 1, c->eip);
1161 c->src.type = OP_MEM;
1162 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1163 c->src.ptr = (unsigned long *)
1164 register_address(c, seg_override_base(ctxt, c),
1165 c->regs[VCPU_REGS_RSI]);
1169 c->src.type = OP_IMM;
1170 c->src.ptr = (unsigned long *)c->eip;
1171 c->src.bytes = c->op_bytes + 2;
1172 insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip);
1175 c->src.type = OP_MEM;
1176 c->src.ptr = (unsigned long *)c->modrm_ea;
1177 c->src.bytes = c->op_bytes + 2;
1182 * Decode and fetch the second source operand: register, memory
1185 switch (c->d & Src2Mask) {
1190 c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1193 c->src2.type = OP_IMM;
1194 c->src2.ptr = (unsigned long *)c->eip;
1196 c->src2.val = insn_fetch(u8, 1, c->eip);
1204 /* Decode and fetch the destination operand: register or memory. */
1205 switch (c->d & DstMask) {
1207 /* Special instructions do their own operand decoding. */
1210 decode_register_operand(&c->dst, c,
1211 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
1215 if ((c->d & ModRM) && c->modrm_mod == 3) {
1216 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1217 c->dst.type = OP_REG;
1218 c->dst.val = c->dst.orig_val = c->modrm_val;
1219 c->dst.ptr = c->modrm_ptr;
1222 c->dst.type = OP_MEM;
1223 c->dst.ptr = (unsigned long *)c->modrm_ea;
1224 if ((c->d & DstMask) == DstMem64)
1227 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1230 unsigned long mask = ~(c->dst.bytes * 8 - 1);
1232 c->dst.ptr = (void *)c->dst.ptr +
1233 (c->src.val & mask) / 8;
1237 c->dst.type = OP_REG;
1238 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1239 c->dst.ptr = &c->regs[VCPU_REGS_RAX];
1240 switch (c->dst.bytes) {
1242 c->dst.val = *(u8 *)c->dst.ptr;
1245 c->dst.val = *(u16 *)c->dst.ptr;
1248 c->dst.val = *(u32 *)c->dst.ptr;
1251 c->dst.val = *(u64 *)c->dst.ptr;
1254 c->dst.orig_val = c->dst.val;
1257 c->dst.type = OP_MEM;
1258 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1259 c->dst.ptr = (unsigned long *)
1260 register_address(c, es_base(ctxt),
1261 c->regs[VCPU_REGS_RDI]);
1267 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
1270 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1271 struct x86_emulate_ops *ops,
1272 unsigned long addr, void *dest, unsigned size)
1275 struct read_cache *mc = &ctxt->decode.mem_read;
1278 int n = min(size, 8u);
1280 if (mc->pos < mc->end)
1283 rc = ops->read_emulated(addr, mc->data + mc->end, n, ctxt->vcpu);
1284 if (rc != X86EMUL_CONTINUE)
1289 memcpy(dest, mc->data + mc->pos, n);
1294 return X86EMUL_CONTINUE;
1297 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1298 struct x86_emulate_ops *ops,
1299 unsigned int size, unsigned short port,
1302 struct read_cache *rc = &ctxt->decode.io_read;
1304 if (rc->pos == rc->end) { /* refill pio read ahead */
1305 struct decode_cache *c = &ctxt->decode;
1306 unsigned int in_page, n;
1307 unsigned int count = c->rep_prefix ?
1308 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1;
1309 in_page = (ctxt->eflags & EFLG_DF) ?
1310 offset_in_page(c->regs[VCPU_REGS_RDI]) :
1311 PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]);
1312 n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
1316 rc->pos = rc->end = 0;
1317 if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu))
1322 memcpy(dest, rc->data + rc->pos, size);
1327 static u32 desc_limit_scaled(struct desc_struct *desc)
1329 u32 limit = get_desc_limit(desc);
1331 return desc->g ? (limit << 12) | 0xfff : limit;
1334 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1335 struct x86_emulate_ops *ops,
1336 u16 selector, struct desc_ptr *dt)
1338 if (selector & 1 << 2) {
1339 struct desc_struct desc;
1340 memset (dt, 0, sizeof *dt);
1341 if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu))
1344 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1345 dt->address = get_desc_base(&desc);
1347 ops->get_gdt(dt, ctxt->vcpu);
1350 /* allowed just for 8 bytes segments */
1351 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1352 struct x86_emulate_ops *ops,
1353 u16 selector, struct desc_struct *desc)
1356 u16 index = selector >> 3;
1361 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1363 if (dt.size < index * 8 + 7) {
1364 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1365 return X86EMUL_PROPAGATE_FAULT;
1367 addr = dt.address + index * 8;
1368 ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1369 if (ret == X86EMUL_PROPAGATE_FAULT)
1370 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1375 /* allowed just for 8 bytes segments */
1376 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1377 struct x86_emulate_ops *ops,
1378 u16 selector, struct desc_struct *desc)
1381 u16 index = selector >> 3;
1386 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1388 if (dt.size < index * 8 + 7) {
1389 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1390 return X86EMUL_PROPAGATE_FAULT;
1393 addr = dt.address + index * 8;
1394 ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1395 if (ret == X86EMUL_PROPAGATE_FAULT)
1396 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1401 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1402 struct x86_emulate_ops *ops,
1403 u16 selector, int seg)
1405 struct desc_struct seg_desc;
1407 unsigned err_vec = GP_VECTOR;
1409 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1412 memset(&seg_desc, 0, sizeof seg_desc);
1414 if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
1415 || ctxt->mode == X86EMUL_MODE_REAL) {
1416 /* set real mode segment descriptor */
1417 set_desc_base(&seg_desc, selector << 4);
1418 set_desc_limit(&seg_desc, 0xffff);
1425 /* NULL selector is not valid for TR, CS and SS */
1426 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
1430 /* TR should be in GDT only */
1431 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1434 if (null_selector) /* for NULL selector skip all following checks */
1437 ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
1438 if (ret != X86EMUL_CONTINUE)
1441 err_code = selector & 0xfffc;
1442 err_vec = GP_VECTOR;
1444 /* can't load system descriptor into segment selecor */
1445 if (seg <= VCPU_SREG_GS && !seg_desc.s)
1449 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1455 cpl = ops->cpl(ctxt->vcpu);
1460 * segment is not a writable data segment or segment
1461 * selector's RPL != CPL or segment selector's RPL != CPL
1463 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1467 if (!(seg_desc.type & 8))
1470 if (seg_desc.type & 4) {
1476 if (rpl > cpl || dpl != cpl)
1479 /* CS(RPL) <- CPL */
1480 selector = (selector & 0xfffc) | cpl;
1483 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1486 case VCPU_SREG_LDTR:
1487 if (seg_desc.s || seg_desc.type != 2)
1490 default: /* DS, ES, FS, or GS */
1492 * segment is not a data or readable code segment or
1493 * ((segment is a data or nonconforming code segment)
1494 * and (both RPL and CPL > DPL))
1496 if ((seg_desc.type & 0xa) == 0x8 ||
1497 (((seg_desc.type & 0xc) != 0xc) &&
1498 (rpl > dpl && cpl > dpl)))
1504 /* mark segment as accessed */
1506 ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
1507 if (ret != X86EMUL_CONTINUE)
1511 ops->set_segment_selector(selector, seg, ctxt->vcpu);
1512 ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);
1513 return X86EMUL_CONTINUE;
1515 kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code);
1516 return X86EMUL_PROPAGATE_FAULT;
1519 static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1521 struct decode_cache *c = &ctxt->decode;
1523 c->dst.type = OP_MEM;
1524 c->dst.bytes = c->op_bytes;
1525 c->dst.val = c->src.val;
1526 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1527 c->dst.ptr = (void *) register_address(c, ss_base(ctxt),
1528 c->regs[VCPU_REGS_RSP]);
1531 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1532 struct x86_emulate_ops *ops,
1533 void *dest, int len)
1535 struct decode_cache *c = &ctxt->decode;
1538 rc = read_emulated(ctxt, ops, register_address(c, ss_base(ctxt),
1539 c->regs[VCPU_REGS_RSP]),
1541 if (rc != X86EMUL_CONTINUE)
1544 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1548 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1549 struct x86_emulate_ops *ops,
1550 void *dest, int len)
1553 unsigned long val, change_mask;
1554 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1555 int cpl = ops->cpl(ctxt->vcpu);
1557 rc = emulate_pop(ctxt, ops, &val, len);
1558 if (rc != X86EMUL_CONTINUE)
1561 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1562 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1564 switch(ctxt->mode) {
1565 case X86EMUL_MODE_PROT64:
1566 case X86EMUL_MODE_PROT32:
1567 case X86EMUL_MODE_PROT16:
1569 change_mask |= EFLG_IOPL;
1571 change_mask |= EFLG_IF;
1573 case X86EMUL_MODE_VM86:
1575 kvm_inject_gp(ctxt->vcpu, 0);
1576 return X86EMUL_PROPAGATE_FAULT;
1578 change_mask |= EFLG_IF;
1580 default: /* real mode */
1581 change_mask |= (EFLG_IOPL | EFLG_IF);
1585 *(unsigned long *)dest =
1586 (ctxt->eflags & ~change_mask) | (val & change_mask);
1591 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
1593 struct decode_cache *c = &ctxt->decode;
1594 struct kvm_segment segment;
1596 kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
1598 c->src.val = segment.selector;
1602 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1603 struct x86_emulate_ops *ops, int seg)
1605 struct decode_cache *c = &ctxt->decode;
1606 unsigned long selector;
1609 rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1610 if (rc != X86EMUL_CONTINUE)
1613 rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
1617 static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
1619 struct decode_cache *c = &ctxt->decode;
1620 unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1621 int reg = VCPU_REGS_RAX;
1623 while (reg <= VCPU_REGS_RDI) {
1624 (reg == VCPU_REGS_RSP) ?
1625 (c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1632 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1633 struct x86_emulate_ops *ops)
1635 struct decode_cache *c = &ctxt->decode;
1636 int rc = X86EMUL_CONTINUE;
1637 int reg = VCPU_REGS_RDI;
1639 while (reg >= VCPU_REGS_RAX) {
1640 if (reg == VCPU_REGS_RSP) {
1641 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1646 rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1647 if (rc != X86EMUL_CONTINUE)
1654 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1655 struct x86_emulate_ops *ops)
1657 struct decode_cache *c = &ctxt->decode;
1659 return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1662 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1664 struct decode_cache *c = &ctxt->decode;
1665 switch (c->modrm_reg) {
1667 emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1670 emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1673 emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1676 emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1678 case 4: /* sal/shl */
1679 case 6: /* sal/shl */
1680 emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1683 emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1686 emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1691 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1692 struct x86_emulate_ops *ops)
1694 struct decode_cache *c = &ctxt->decode;
1696 switch (c->modrm_reg) {
1697 case 0 ... 1: /* test */
1698 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1701 c->dst.val = ~c->dst.val;
1704 emulate_1op("neg", c->dst, ctxt->eflags);
1712 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1713 struct x86_emulate_ops *ops)
1715 struct decode_cache *c = &ctxt->decode;
1717 switch (c->modrm_reg) {
1719 emulate_1op("inc", c->dst, ctxt->eflags);
1722 emulate_1op("dec", c->dst, ctxt->eflags);
1724 case 2: /* call near abs */ {
1727 c->eip = c->src.val;
1728 c->src.val = old_eip;
1732 case 4: /* jmp abs */
1733 c->eip = c->src.val;
1739 return X86EMUL_CONTINUE;
1742 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1743 struct x86_emulate_ops *ops)
1745 struct decode_cache *c = &ctxt->decode;
1746 u64 old = c->dst.orig_val;
1748 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1749 ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1751 c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1752 c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1753 ctxt->eflags &= ~EFLG_ZF;
1755 c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1756 (u32) c->regs[VCPU_REGS_RBX];
1758 ctxt->eflags |= EFLG_ZF;
1760 return X86EMUL_CONTINUE;
1763 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1764 struct x86_emulate_ops *ops)
1766 struct decode_cache *c = &ctxt->decode;
1770 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1771 if (rc != X86EMUL_CONTINUE)
1773 if (c->op_bytes == 4)
1774 c->eip = (u32)c->eip;
1775 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1776 if (rc != X86EMUL_CONTINUE)
1778 rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1782 static inline int writeback(struct x86_emulate_ctxt *ctxt,
1783 struct x86_emulate_ops *ops)
1786 struct decode_cache *c = &ctxt->decode;
1788 switch (c->dst.type) {
1790 /* The 4-byte case *is* correct:
1791 * in 64-bit mode we zero-extend.
1793 switch (c->dst.bytes) {
1795 *(u8 *)c->dst.ptr = (u8)c->dst.val;
1798 *(u16 *)c->dst.ptr = (u16)c->dst.val;
1801 *c->dst.ptr = (u32)c->dst.val;
1802 break; /* 64b: zero-ext */
1804 *c->dst.ptr = c->dst.val;
1810 rc = ops->cmpxchg_emulated(
1811 (unsigned long)c->dst.ptr,
1817 rc = ops->write_emulated(
1818 (unsigned long)c->dst.ptr,
1822 if (rc != X86EMUL_CONTINUE)
1831 return X86EMUL_CONTINUE;
1834 static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
1836 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
1838 * an sti; sti; sequence only disable interrupts for the first
1839 * instruction. So, if the last instruction, be it emulated or
1840 * not, left the system with the INT_STI flag enabled, it
1841 * means that the last instruction is an sti. We should not
1842 * leave the flag on in this case. The same goes for mov ss
1844 if (!(int_shadow & mask))
1845 ctxt->interruptibility = mask;
1849 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1850 struct kvm_segment *cs, struct kvm_segment *ss)
1852 memset(cs, 0, sizeof(struct kvm_segment));
1853 kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
1854 memset(ss, 0, sizeof(struct kvm_segment));
1856 cs->l = 0; /* will be adjusted later */
1857 cs->base = 0; /* flat segment */
1858 cs->g = 1; /* 4kb granularity */
1859 cs->limit = 0xffffffff; /* 4GB limit */
1860 cs->type = 0x0b; /* Read, Execute, Accessed */
1862 cs->dpl = 0; /* will be adjusted later */
1867 ss->base = 0; /* flat segment */
1868 ss->limit = 0xffffffff; /* 4GB limit */
1869 ss->g = 1; /* 4kb granularity */
1871 ss->type = 0x03; /* Read/Write, Accessed */
1872 ss->db = 1; /* 32bit stack segment */
1878 emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1880 struct decode_cache *c = &ctxt->decode;
1881 struct kvm_segment cs, ss;
1884 /* syscall is not available in real mode */
1885 if (ctxt->mode == X86EMUL_MODE_REAL ||
1886 ctxt->mode == X86EMUL_MODE_VM86) {
1887 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1888 return X86EMUL_PROPAGATE_FAULT;
1891 setup_syscalls_segments(ctxt, &cs, &ss);
1893 ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1895 cs.selector = (u16)(msr_data & 0xfffc);
1896 ss.selector = (u16)(msr_data + 8);
1898 if (is_long_mode(ctxt->vcpu)) {
1902 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1903 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1905 c->regs[VCPU_REGS_RCX] = c->eip;
1906 if (is_long_mode(ctxt->vcpu)) {
1907 #ifdef CONFIG_X86_64
1908 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1910 ops->get_msr(ctxt->vcpu,
1911 ctxt->mode == X86EMUL_MODE_PROT64 ?
1912 MSR_LSTAR : MSR_CSTAR, &msr_data);
1915 ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1916 ctxt->eflags &= ~(msr_data | EFLG_RF);
1920 ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1921 c->eip = (u32)msr_data;
1923 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1926 return X86EMUL_CONTINUE;
1930 emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1932 struct decode_cache *c = &ctxt->decode;
1933 struct kvm_segment cs, ss;
1936 /* inject #GP if in real mode */
1937 if (ctxt->mode == X86EMUL_MODE_REAL) {
1938 kvm_inject_gp(ctxt->vcpu, 0);
1939 return X86EMUL_PROPAGATE_FAULT;
1942 /* XXX sysenter/sysexit have not been tested in 64bit mode.
1943 * Therefore, we inject an #UD.
1945 if (ctxt->mode == X86EMUL_MODE_PROT64) {
1946 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1947 return X86EMUL_PROPAGATE_FAULT;
1950 setup_syscalls_segments(ctxt, &cs, &ss);
1952 ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1953 switch (ctxt->mode) {
1954 case X86EMUL_MODE_PROT32:
1955 if ((msr_data & 0xfffc) == 0x0) {
1956 kvm_inject_gp(ctxt->vcpu, 0);
1957 return X86EMUL_PROPAGATE_FAULT;
1960 case X86EMUL_MODE_PROT64:
1961 if (msr_data == 0x0) {
1962 kvm_inject_gp(ctxt->vcpu, 0);
1963 return X86EMUL_PROPAGATE_FAULT;
1968 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1969 cs.selector = (u16)msr_data;
1970 cs.selector &= ~SELECTOR_RPL_MASK;
1971 ss.selector = cs.selector + 8;
1972 ss.selector &= ~SELECTOR_RPL_MASK;
1973 if (ctxt->mode == X86EMUL_MODE_PROT64
1974 || is_long_mode(ctxt->vcpu)) {
1979 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1980 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1982 ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1985 ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1986 c->regs[VCPU_REGS_RSP] = msr_data;
1988 return X86EMUL_CONTINUE;
1992 emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1994 struct decode_cache *c = &ctxt->decode;
1995 struct kvm_segment cs, ss;
1999 /* inject #GP if in real mode or Virtual 8086 mode */
2000 if (ctxt->mode == X86EMUL_MODE_REAL ||
2001 ctxt->mode == X86EMUL_MODE_VM86) {
2002 kvm_inject_gp(ctxt->vcpu, 0);
2003 return X86EMUL_PROPAGATE_FAULT;
2006 setup_syscalls_segments(ctxt, &cs, &ss);
2008 if ((c->rex_prefix & 0x8) != 0x0)
2009 usermode = X86EMUL_MODE_PROT64;
2011 usermode = X86EMUL_MODE_PROT32;
2015 ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
2017 case X86EMUL_MODE_PROT32:
2018 cs.selector = (u16)(msr_data + 16);
2019 if ((msr_data & 0xfffc) == 0x0) {
2020 kvm_inject_gp(ctxt->vcpu, 0);
2021 return X86EMUL_PROPAGATE_FAULT;
2023 ss.selector = (u16)(msr_data + 24);
2025 case X86EMUL_MODE_PROT64:
2026 cs.selector = (u16)(msr_data + 32);
2027 if (msr_data == 0x0) {
2028 kvm_inject_gp(ctxt->vcpu, 0);
2029 return X86EMUL_PROPAGATE_FAULT;
2031 ss.selector = cs.selector + 8;
2036 cs.selector |= SELECTOR_RPL_MASK;
2037 ss.selector |= SELECTOR_RPL_MASK;
2039 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
2040 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
2042 c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
2043 c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
2045 return X86EMUL_CONTINUE;
2048 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
2049 struct x86_emulate_ops *ops)
2052 if (ctxt->mode == X86EMUL_MODE_REAL)
2054 if (ctxt->mode == X86EMUL_MODE_VM86)
2056 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
2057 return ops->cpl(ctxt->vcpu) > iopl;
2060 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2061 struct x86_emulate_ops *ops,
2064 struct kvm_segment tr_seg;
2067 u8 perm, bit_idx = port & 0x7;
2068 unsigned mask = (1 << len) - 1;
2070 kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
2071 if (tr_seg.unusable)
2073 if (tr_seg.limit < 103)
2075 r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
2077 if (r != X86EMUL_CONTINUE)
2079 if (io_bitmap_ptr + port/8 > tr_seg.limit)
2081 r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
2083 if (r != X86EMUL_CONTINUE)
2085 if ((perm >> bit_idx) & mask)
2090 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2091 struct x86_emulate_ops *ops,
2094 if (emulator_bad_iopl(ctxt, ops))
2095 if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
2100 static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt,
2101 struct x86_emulate_ops *ops,
2104 struct desc_struct desc;
2105 if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu))
2106 return get_desc_base(&desc);
2111 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2112 struct x86_emulate_ops *ops,
2113 struct tss_segment_16 *tss)
2115 struct decode_cache *c = &ctxt->decode;
2118 tss->flag = ctxt->eflags;
2119 tss->ax = c->regs[VCPU_REGS_RAX];
2120 tss->cx = c->regs[VCPU_REGS_RCX];
2121 tss->dx = c->regs[VCPU_REGS_RDX];
2122 tss->bx = c->regs[VCPU_REGS_RBX];
2123 tss->sp = c->regs[VCPU_REGS_RSP];
2124 tss->bp = c->regs[VCPU_REGS_RBP];
2125 tss->si = c->regs[VCPU_REGS_RSI];
2126 tss->di = c->regs[VCPU_REGS_RDI];
2128 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2129 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2130 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2131 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2132 tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2135 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2136 struct x86_emulate_ops *ops,
2137 struct tss_segment_16 *tss)
2139 struct decode_cache *c = &ctxt->decode;
2143 ctxt->eflags = tss->flag | 2;
2144 c->regs[VCPU_REGS_RAX] = tss->ax;
2145 c->regs[VCPU_REGS_RCX] = tss->cx;
2146 c->regs[VCPU_REGS_RDX] = tss->dx;
2147 c->regs[VCPU_REGS_RBX] = tss->bx;
2148 c->regs[VCPU_REGS_RSP] = tss->sp;
2149 c->regs[VCPU_REGS_RBP] = tss->bp;
2150 c->regs[VCPU_REGS_RSI] = tss->si;
2151 c->regs[VCPU_REGS_RDI] = tss->di;
2154 * SDM says that segment selectors are loaded before segment
2157 ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
2158 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2159 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2160 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2161 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2164 * Now load segment descriptors. If fault happenes at this stage
2165 * it is handled in a context of new task
2167 ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
2168 if (ret != X86EMUL_CONTINUE)
2170 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2171 if (ret != X86EMUL_CONTINUE)
2173 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2174 if (ret != X86EMUL_CONTINUE)
2176 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2177 if (ret != X86EMUL_CONTINUE)
2179 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2180 if (ret != X86EMUL_CONTINUE)
2183 return X86EMUL_CONTINUE;
2186 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2187 struct x86_emulate_ops *ops,
2188 u16 tss_selector, u16 old_tss_sel,
2189 ulong old_tss_base, struct desc_struct *new_desc)
2191 struct tss_segment_16 tss_seg;
2193 u32 err, new_tss_base = get_desc_base(new_desc);
2195 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2197 if (ret == X86EMUL_PROPAGATE_FAULT) {
2198 /* FIXME: need to provide precise fault address */
2199 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2203 save_state_to_tss16(ctxt, ops, &tss_seg);
2205 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2207 if (ret == X86EMUL_PROPAGATE_FAULT) {
2208 /* FIXME: need to provide precise fault address */
2209 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2213 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2215 if (ret == X86EMUL_PROPAGATE_FAULT) {
2216 /* FIXME: need to provide precise fault address */
2217 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2221 if (old_tss_sel != 0xffff) {
2222 tss_seg.prev_task_link = old_tss_sel;
2224 ret = ops->write_std(new_tss_base,
2225 &tss_seg.prev_task_link,
2226 sizeof tss_seg.prev_task_link,
2228 if (ret == X86EMUL_PROPAGATE_FAULT) {
2229 /* FIXME: need to provide precise fault address */
2230 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2235 return load_state_from_tss16(ctxt, ops, &tss_seg);
2238 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2239 struct x86_emulate_ops *ops,
2240 struct tss_segment_32 *tss)
2242 struct decode_cache *c = &ctxt->decode;
2244 tss->cr3 = ops->get_cr(3, ctxt->vcpu);
2246 tss->eflags = ctxt->eflags;
2247 tss->eax = c->regs[VCPU_REGS_RAX];
2248 tss->ecx = c->regs[VCPU_REGS_RCX];
2249 tss->edx = c->regs[VCPU_REGS_RDX];
2250 tss->ebx = c->regs[VCPU_REGS_RBX];
2251 tss->esp = c->regs[VCPU_REGS_RSP];
2252 tss->ebp = c->regs[VCPU_REGS_RBP];
2253 tss->esi = c->regs[VCPU_REGS_RSI];
2254 tss->edi = c->regs[VCPU_REGS_RDI];
2256 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2257 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2258 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2259 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2260 tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
2261 tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
2262 tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2265 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2266 struct x86_emulate_ops *ops,
2267 struct tss_segment_32 *tss)
2269 struct decode_cache *c = &ctxt->decode;
2272 ops->set_cr(3, tss->cr3, ctxt->vcpu);
2274 ctxt->eflags = tss->eflags | 2;
2275 c->regs[VCPU_REGS_RAX] = tss->eax;
2276 c->regs[VCPU_REGS_RCX] = tss->ecx;
2277 c->regs[VCPU_REGS_RDX] = tss->edx;
2278 c->regs[VCPU_REGS_RBX] = tss->ebx;
2279 c->regs[VCPU_REGS_RSP] = tss->esp;
2280 c->regs[VCPU_REGS_RBP] = tss->ebp;
2281 c->regs[VCPU_REGS_RSI] = tss->esi;
2282 c->regs[VCPU_REGS_RDI] = tss->edi;
2285 * SDM says that segment selectors are loaded before segment
2288 ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
2289 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2290 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2291 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2292 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2293 ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
2294 ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
2297 * Now load segment descriptors. If fault happenes at this stage
2298 * it is handled in a context of new task
2300 ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
2301 if (ret != X86EMUL_CONTINUE)
2303 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2304 if (ret != X86EMUL_CONTINUE)
2306 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2307 if (ret != X86EMUL_CONTINUE)
2309 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2310 if (ret != X86EMUL_CONTINUE)
2312 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2313 if (ret != X86EMUL_CONTINUE)
2315 ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
2316 if (ret != X86EMUL_CONTINUE)
2318 ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
2319 if (ret != X86EMUL_CONTINUE)
2322 return X86EMUL_CONTINUE;
2325 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2326 struct x86_emulate_ops *ops,
2327 u16 tss_selector, u16 old_tss_sel,
2328 ulong old_tss_base, struct desc_struct *new_desc)
2330 struct tss_segment_32 tss_seg;
2332 u32 err, new_tss_base = get_desc_base(new_desc);
2334 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2336 if (ret == X86EMUL_PROPAGATE_FAULT) {
2337 /* FIXME: need to provide precise fault address */
2338 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2342 save_state_to_tss32(ctxt, ops, &tss_seg);
2344 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2346 if (ret == X86EMUL_PROPAGATE_FAULT) {
2347 /* FIXME: need to provide precise fault address */
2348 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2352 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2354 if (ret == X86EMUL_PROPAGATE_FAULT) {
2355 /* FIXME: need to provide precise fault address */
2356 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2360 if (old_tss_sel != 0xffff) {
2361 tss_seg.prev_task_link = old_tss_sel;
2363 ret = ops->write_std(new_tss_base,
2364 &tss_seg.prev_task_link,
2365 sizeof tss_seg.prev_task_link,
2367 if (ret == X86EMUL_PROPAGATE_FAULT) {
2368 /* FIXME: need to provide precise fault address */
2369 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2374 return load_state_from_tss32(ctxt, ops, &tss_seg);
2377 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2378 struct x86_emulate_ops *ops,
2379 u16 tss_selector, int reason,
2380 bool has_error_code, u32 error_code)
2382 struct desc_struct curr_tss_desc, next_tss_desc;
2384 u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
2385 ulong old_tss_base =
2386 get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR);
2389 /* FIXME: old_tss_base == ~0 ? */
2391 ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
2392 if (ret != X86EMUL_CONTINUE)
2394 ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
2395 if (ret != X86EMUL_CONTINUE)
2398 /* FIXME: check that next_tss_desc is tss */
2400 if (reason != TASK_SWITCH_IRET) {
2401 if ((tss_selector & 3) > next_tss_desc.dpl ||
2402 ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) {
2403 kvm_inject_gp(ctxt->vcpu, 0);
2404 return X86EMUL_PROPAGATE_FAULT;
2408 desc_limit = desc_limit_scaled(&next_tss_desc);
2409 if (!next_tss_desc.p ||
2410 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2411 desc_limit < 0x2b)) {
2412 kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR,
2413 tss_selector & 0xfffc);
2414 return X86EMUL_PROPAGATE_FAULT;
2417 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2418 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2419 write_segment_descriptor(ctxt, ops, old_tss_sel,
2423 if (reason == TASK_SWITCH_IRET)
2424 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2426 /* set back link to prev task only if NT bit is set in eflags
2427 note that old_tss_sel is not used afetr this point */
2428 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2429 old_tss_sel = 0xffff;
2431 if (next_tss_desc.type & 8)
2432 ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
2433 old_tss_base, &next_tss_desc);
2435 ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
2436 old_tss_base, &next_tss_desc);
2437 if (ret != X86EMUL_CONTINUE)
2440 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2441 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2443 if (reason != TASK_SWITCH_IRET) {
2444 next_tss_desc.type |= (1 << 1); /* set busy flag */
2445 write_segment_descriptor(ctxt, ops, tss_selector,
2449 ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
2450 ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu);
2451 ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2453 if (has_error_code) {
2454 struct decode_cache *c = &ctxt->decode;
2456 c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
2458 c->src.val = (unsigned long) error_code;
2465 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2466 struct x86_emulate_ops *ops,
2467 u16 tss_selector, int reason,
2468 bool has_error_code, u32 error_code)
2470 struct decode_cache *c = &ctxt->decode;
2473 memset(c, 0, sizeof(struct decode_cache));
2475 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2476 c->dst.type = OP_NONE;
2478 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason,
2479 has_error_code, error_code);
2481 if (rc == X86EMUL_CONTINUE) {
2482 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2483 kvm_rip_write(ctxt->vcpu, c->eip);
2484 rc = writeback(ctxt, ops);
2487 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2490 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base,
2491 int reg, struct operand *op)
2493 struct decode_cache *c = &ctxt->decode;
2494 int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
2496 register_address_increment(c, &c->regs[reg], df * op->bytes);
2497 op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]);
2501 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
2504 struct decode_cache *c = &ctxt->decode;
2505 int rc = X86EMUL_CONTINUE;
2506 int saved_dst_type = c->dst.type;
2508 ctxt->interruptibility = 0;
2509 ctxt->decode.mem_read.pos = 0;
2511 /* Shadow copy of register state. Committed on successful emulation.
2512 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
2516 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2518 if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
2519 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2523 /* LOCK prefix is allowed only with some instructions */
2524 if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
2525 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2529 /* Privileged instruction can be executed only in CPL=0 */
2530 if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
2531 kvm_inject_gp(ctxt->vcpu, 0);
2535 if (c->rep_prefix && (c->d & String)) {
2536 ctxt->restart = true;
2537 /* All REP prefixes have the same first termination condition */
2538 if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
2540 ctxt->restart = false;
2541 kvm_rip_write(ctxt->vcpu, c->eip);
2544 /* The second termination condition only applies for REPE
2545 * and REPNE. Test if the repeat string operation prefix is
2546 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
2547 * corresponding termination condition according to:
2548 * - if REPE/REPZ and ZF = 0 then done
2549 * - if REPNE/REPNZ and ZF = 1 then done
2551 if ((c->b == 0xa6) || (c->b == 0xa7) ||
2552 (c->b == 0xae) || (c->b == 0xaf)) {
2553 if ((c->rep_prefix == REPE_PREFIX) &&
2554 ((ctxt->eflags & EFLG_ZF) == 0))
2556 if ((c->rep_prefix == REPNE_PREFIX) &&
2557 ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))
2563 if (c->src.type == OP_MEM) {
2564 rc = read_emulated(ctxt, ops, (unsigned long)c->src.ptr,
2565 c->src.valptr, c->src.bytes);
2566 if (rc != X86EMUL_CONTINUE)
2568 c->src.orig_val = c->src.val;
2571 if (c->src2.type == OP_MEM) {
2572 rc = read_emulated(ctxt, ops, (unsigned long)c->src2.ptr,
2573 &c->src2.val, c->src2.bytes);
2574 if (rc != X86EMUL_CONTINUE)
2578 if ((c->d & DstMask) == ImplicitOps)
2582 if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
2583 /* optimisation - avoid slow emulated read if Mov */
2584 rc = read_emulated(ctxt, ops, (unsigned long)c->dst.ptr,
2585 &c->dst.val, c->dst.bytes);
2586 if (rc != X86EMUL_CONTINUE)
2589 c->dst.orig_val = c->dst.val;
2599 emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
2601 case 0x06: /* push es */
2602 emulate_push_sreg(ctxt, VCPU_SREG_ES);
2604 case 0x07: /* pop es */
2605 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
2606 if (rc != X86EMUL_CONTINUE)
2611 emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
2613 case 0x0e: /* push cs */
2614 emulate_push_sreg(ctxt, VCPU_SREG_CS);
2618 emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
2620 case 0x16: /* push ss */
2621 emulate_push_sreg(ctxt, VCPU_SREG_SS);
2623 case 0x17: /* pop ss */
2624 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
2625 if (rc != X86EMUL_CONTINUE)
2630 emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
2632 case 0x1e: /* push ds */
2633 emulate_push_sreg(ctxt, VCPU_SREG_DS);
2635 case 0x1f: /* pop ds */
2636 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
2637 if (rc != X86EMUL_CONTINUE)
2642 emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
2646 emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
2650 emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
2654 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2656 case 0x40 ... 0x47: /* inc r16/r32 */
2657 emulate_1op("inc", c->dst, ctxt->eflags);
2659 case 0x48 ... 0x4f: /* dec r16/r32 */
2660 emulate_1op("dec", c->dst, ctxt->eflags);
2662 case 0x50 ... 0x57: /* push reg */
2665 case 0x58 ... 0x5f: /* pop reg */
2667 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
2668 if (rc != X86EMUL_CONTINUE)
2671 case 0x60: /* pusha */
2672 emulate_pusha(ctxt);
2674 case 0x61: /* popa */
2675 rc = emulate_popa(ctxt, ops);
2676 if (rc != X86EMUL_CONTINUE)
2679 case 0x63: /* movsxd */
2680 if (ctxt->mode != X86EMUL_MODE_PROT64)
2681 goto cannot_emulate;
2682 c->dst.val = (s32) c->src.val;
2684 case 0x68: /* push imm */
2685 case 0x6a: /* push imm8 */
2688 case 0x6c: /* insb */
2689 case 0x6d: /* insw/insd */
2690 c->dst.bytes = min(c->dst.bytes, 4u);
2691 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2693 kvm_inject_gp(ctxt->vcpu, 0);
2696 if (!pio_in_emulated(ctxt, ops, c->dst.bytes,
2697 c->regs[VCPU_REGS_RDX], &c->dst.val))
2698 goto done; /* IO is needed, skip writeback */
2700 case 0x6e: /* outsb */
2701 case 0x6f: /* outsw/outsd */
2702 c->src.bytes = min(c->src.bytes, 4u);
2703 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2705 kvm_inject_gp(ctxt->vcpu, 0);
2708 ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX],
2709 &c->src.val, 1, ctxt->vcpu);
2711 c->dst.type = OP_NONE; /* nothing to writeback */
2713 case 0x70 ... 0x7f: /* jcc (short) */
2714 if (test_cc(c->b, ctxt->eflags))
2715 jmp_rel(c, c->src.val);
2717 case 0x80 ... 0x83: /* Grp1 */
2718 switch (c->modrm_reg) {
2738 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
2740 case 0x86 ... 0x87: /* xchg */
2742 /* Write back the register source. */
2743 switch (c->dst.bytes) {
2745 *(u8 *) c->src.ptr = (u8) c->dst.val;
2748 *(u16 *) c->src.ptr = (u16) c->dst.val;
2751 *c->src.ptr = (u32) c->dst.val;
2752 break; /* 64b reg: zero-extend */
2754 *c->src.ptr = c->dst.val;
2758 * Write back the memory destination with implicit LOCK
2761 c->dst.val = c->src.val;
2764 case 0x88 ... 0x8b: /* mov */
2766 case 0x8c: { /* mov r/m, sreg */
2767 struct kvm_segment segreg;
2769 if (c->modrm_reg <= VCPU_SREG_GS)
2770 kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg);
2772 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2775 c->dst.val = segreg.selector;
2778 case 0x8d: /* lea r16/r32, m */
2779 c->dst.val = c->modrm_ea;
2781 case 0x8e: { /* mov seg, r/m16 */
2786 if (c->modrm_reg == VCPU_SREG_CS ||
2787 c->modrm_reg > VCPU_SREG_GS) {
2788 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2792 if (c->modrm_reg == VCPU_SREG_SS)
2793 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS);
2795 rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
2797 c->dst.type = OP_NONE; /* Disable writeback. */
2800 case 0x8f: /* pop (sole member of Grp1a) */
2801 rc = emulate_grp1a(ctxt, ops);
2802 if (rc != X86EMUL_CONTINUE)
2805 case 0x90: /* nop / xchg r8,rax */
2806 if (c->dst.ptr == (unsigned long *)&c->regs[VCPU_REGS_RAX]) {
2807 c->dst.type = OP_NONE; /* nop */
2810 case 0x91 ... 0x97: /* xchg reg,rax */
2811 c->src.type = OP_REG;
2812 c->src.bytes = c->op_bytes;
2813 c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
2814 c->src.val = *(c->src.ptr);
2816 case 0x9c: /* pushf */
2817 c->src.val = (unsigned long) ctxt->eflags;
2820 case 0x9d: /* popf */
2821 c->dst.type = OP_REG;
2822 c->dst.ptr = (unsigned long *) &ctxt->eflags;
2823 c->dst.bytes = c->op_bytes;
2824 rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
2825 if (rc != X86EMUL_CONTINUE)
2828 case 0xa0 ... 0xa1: /* mov */
2829 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2830 c->dst.val = c->src.val;
2832 case 0xa2 ... 0xa3: /* mov */
2833 c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
2835 case 0xa4 ... 0xa5: /* movs */
2837 case 0xa6 ... 0xa7: /* cmps */
2838 c->dst.type = OP_NONE; /* Disable writeback. */
2839 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
2841 case 0xaa ... 0xab: /* stos */
2842 c->dst.val = c->regs[VCPU_REGS_RAX];
2844 case 0xac ... 0xad: /* lods */
2846 case 0xae ... 0xaf: /* scas */
2847 DPRINTF("Urk! I don't handle SCAS.\n");
2848 goto cannot_emulate;
2849 case 0xb0 ... 0xbf: /* mov r, imm */
2854 case 0xc3: /* ret */
2855 c->dst.type = OP_REG;
2856 c->dst.ptr = &c->eip;
2857 c->dst.bytes = c->op_bytes;
2858 goto pop_instruction;
2859 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
2861 c->dst.val = c->src.val;
2863 case 0xcb: /* ret far */
2864 rc = emulate_ret_far(ctxt, ops);
2865 if (rc != X86EMUL_CONTINUE)
2868 case 0xd0 ... 0xd1: /* Grp2 */
2872 case 0xd2 ... 0xd3: /* Grp2 */
2873 c->src.val = c->regs[VCPU_REGS_RCX];
2876 case 0xe4: /* inb */
2879 case 0xe6: /* outb */
2880 case 0xe7: /* out */
2882 case 0xe8: /* call (near) */ {
2883 long int rel = c->src.val;
2884 c->src.val = (unsigned long) c->eip;
2889 case 0xe9: /* jmp rel */
2891 case 0xea: { /* jmp far */
2894 memcpy(&sel, c->src.valptr + c->op_bytes, 2);
2896 if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS))
2900 memcpy(&c->eip, c->src.valptr, c->op_bytes);
2904 jmp: /* jmp rel short */
2905 jmp_rel(c, c->src.val);
2906 c->dst.type = OP_NONE; /* Disable writeback. */
2908 case 0xec: /* in al,dx */
2909 case 0xed: /* in (e/r)ax,dx */
2910 c->src.val = c->regs[VCPU_REGS_RDX];
2912 c->dst.bytes = min(c->dst.bytes, 4u);
2913 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2914 kvm_inject_gp(ctxt->vcpu, 0);
2917 if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
2919 goto done; /* IO is needed */
2921 case 0xee: /* out al,dx */
2922 case 0xef: /* out (e/r)ax,dx */
2923 c->src.val = c->regs[VCPU_REGS_RDX];
2925 c->dst.bytes = min(c->dst.bytes, 4u);
2926 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2927 kvm_inject_gp(ctxt->vcpu, 0);
2930 ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1,
2932 c->dst.type = OP_NONE; /* Disable writeback. */
2934 case 0xf4: /* hlt */
2935 ctxt->vcpu->arch.halt_request = 1;
2937 case 0xf5: /* cmc */
2938 /* complement carry flag from eflags reg */
2939 ctxt->eflags ^= EFLG_CF;
2940 c->dst.type = OP_NONE; /* Disable writeback. */
2942 case 0xf6 ... 0xf7: /* Grp3 */
2943 if (!emulate_grp3(ctxt, ops))
2944 goto cannot_emulate;
2946 case 0xf8: /* clc */
2947 ctxt->eflags &= ~EFLG_CF;
2948 c->dst.type = OP_NONE; /* Disable writeback. */
2950 case 0xfa: /* cli */
2951 if (emulator_bad_iopl(ctxt, ops))
2952 kvm_inject_gp(ctxt->vcpu, 0);
2954 ctxt->eflags &= ~X86_EFLAGS_IF;
2955 c->dst.type = OP_NONE; /* Disable writeback. */
2958 case 0xfb: /* sti */
2959 if (emulator_bad_iopl(ctxt, ops))
2960 kvm_inject_gp(ctxt->vcpu, 0);
2962 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI);
2963 ctxt->eflags |= X86_EFLAGS_IF;
2964 c->dst.type = OP_NONE; /* Disable writeback. */
2967 case 0xfc: /* cld */
2968 ctxt->eflags &= ~EFLG_DF;
2969 c->dst.type = OP_NONE; /* Disable writeback. */
2971 case 0xfd: /* std */
2972 ctxt->eflags |= EFLG_DF;
2973 c->dst.type = OP_NONE; /* Disable writeback. */
2975 case 0xfe: /* Grp4 */
2977 rc = emulate_grp45(ctxt, ops);
2978 if (rc != X86EMUL_CONTINUE)
2981 case 0xff: /* Grp5 */
2982 if (c->modrm_reg == 5)
2988 rc = writeback(ctxt, ops);
2989 if (rc != X86EMUL_CONTINUE)
2993 * restore dst type in case the decoding will be reused
2994 * (happens for string instruction )
2996 c->dst.type = saved_dst_type;
2998 if ((c->d & SrcMask) == SrcSI)
2999 string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI,
3002 if ((c->d & DstMask) == DstDI)
3003 string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst);
3005 if (c->rep_prefix && (c->d & String)) {
3006 struct read_cache *rc = &ctxt->decode.io_read;
3007 register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
3009 * Re-enter guest when pio read ahead buffer is empty or,
3010 * if it is not used, after each 1024 iteration.
3012 if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) ||
3013 (rc->end != 0 && rc->end == rc->pos))
3014 ctxt->restart = false;
3017 * reset read cache here in case string instruction is restared
3020 ctxt->decode.mem_read.end = 0;
3021 /* Commit shadow register state. */
3022 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
3023 kvm_rip_write(ctxt->vcpu, c->eip);
3024 ops->set_rflags(ctxt->vcpu, ctxt->eflags);
3027 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
3031 case 0x01: /* lgdt, lidt, lmsw */
3032 switch (c->modrm_reg) {
3034 unsigned long address;
3036 case 0: /* vmcall */
3037 if (c->modrm_mod != 3 || c->modrm_rm != 1)
3038 goto cannot_emulate;
3040 rc = kvm_fix_hypercall(ctxt->vcpu);
3041 if (rc != X86EMUL_CONTINUE)
3044 /* Let the processor re-execute the fixed hypercall */
3046 /* Disable writeback. */
3047 c->dst.type = OP_NONE;
3050 rc = read_descriptor(ctxt, ops, c->src.ptr,
3051 &size, &address, c->op_bytes);
3052 if (rc != X86EMUL_CONTINUE)
3054 realmode_lgdt(ctxt->vcpu, size, address);
3055 /* Disable writeback. */
3056 c->dst.type = OP_NONE;
3058 case 3: /* lidt/vmmcall */
3059 if (c->modrm_mod == 3) {
3060 switch (c->modrm_rm) {
3062 rc = kvm_fix_hypercall(ctxt->vcpu);
3063 if (rc != X86EMUL_CONTINUE)
3067 goto cannot_emulate;
3070 rc = read_descriptor(ctxt, ops, c->src.ptr,
3073 if (rc != X86EMUL_CONTINUE)
3075 realmode_lidt(ctxt->vcpu, size, address);
3077 /* Disable writeback. */
3078 c->dst.type = OP_NONE;
3082 c->dst.val = ops->get_cr(0, ctxt->vcpu);
3085 ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) |
3086 (c->src.val & 0x0f), ctxt->vcpu);
3087 c->dst.type = OP_NONE;
3089 case 5: /* not defined */
3090 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3093 emulate_invlpg(ctxt->vcpu, c->modrm_ea);
3094 /* Disable writeback. */
3095 c->dst.type = OP_NONE;
3098 goto cannot_emulate;
3101 case 0x05: /* syscall */
3102 rc = emulate_syscall(ctxt, ops);
3103 if (rc != X86EMUL_CONTINUE)
3109 emulate_clts(ctxt->vcpu);
3110 c->dst.type = OP_NONE;
3112 case 0x08: /* invd */
3113 case 0x09: /* wbinvd */
3114 case 0x0d: /* GrpP (prefetch) */
3115 case 0x18: /* Grp16 (prefetch/nop) */
3116 c->dst.type = OP_NONE;
3118 case 0x20: /* mov cr, reg */
3119 switch (c->modrm_reg) {
3123 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3126 c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3127 c->dst.type = OP_NONE; /* no writeback */
3129 case 0x21: /* mov from dr to reg */
3130 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3131 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3132 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3135 ops->get_dr(c->modrm_reg, &c->regs[c->modrm_rm], ctxt->vcpu);
3136 c->dst.type = OP_NONE; /* no writeback */
3138 case 0x22: /* mov reg, cr */
3139 ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu);
3140 c->dst.type = OP_NONE;
3142 case 0x23: /* mov from reg to dr */
3143 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3144 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3145 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3149 ops->set_dr(c->modrm_reg,c->regs[c->modrm_rm] &
3150 ((ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U),
3152 c->dst.type = OP_NONE; /* no writeback */
3156 msr_data = (u32)c->regs[VCPU_REGS_RAX]
3157 | ((u64)c->regs[VCPU_REGS_RDX] << 32);
3158 if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
3159 kvm_inject_gp(ctxt->vcpu, 0);
3162 rc = X86EMUL_CONTINUE;
3163 c->dst.type = OP_NONE;
3167 if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
3168 kvm_inject_gp(ctxt->vcpu, 0);
3171 c->regs[VCPU_REGS_RAX] = (u32)msr_data;
3172 c->regs[VCPU_REGS_RDX] = msr_data >> 32;
3174 rc = X86EMUL_CONTINUE;
3175 c->dst.type = OP_NONE;
3177 case 0x34: /* sysenter */
3178 rc = emulate_sysenter(ctxt, ops);
3179 if (rc != X86EMUL_CONTINUE)
3184 case 0x35: /* sysexit */
3185 rc = emulate_sysexit(ctxt, ops);
3186 if (rc != X86EMUL_CONTINUE)
3191 case 0x40 ... 0x4f: /* cmov */
3192 c->dst.val = c->dst.orig_val = c->src.val;
3193 if (!test_cc(c->b, ctxt->eflags))
3194 c->dst.type = OP_NONE; /* no writeback */
3196 case 0x80 ... 0x8f: /* jnz rel, etc*/
3197 if (test_cc(c->b, ctxt->eflags))
3198 jmp_rel(c, c->src.val);
3199 c->dst.type = OP_NONE;
3201 case 0xa0: /* push fs */
3202 emulate_push_sreg(ctxt, VCPU_SREG_FS);
3204 case 0xa1: /* pop fs */
3205 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
3206 if (rc != X86EMUL_CONTINUE)
3211 c->dst.type = OP_NONE;
3212 /* only subword offset */
3213 c->src.val &= (c->dst.bytes << 3) - 1;
3214 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
3216 case 0xa4: /* shld imm8, r, r/m */
3217 case 0xa5: /* shld cl, r, r/m */
3218 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
3220 case 0xa8: /* push gs */
3221 emulate_push_sreg(ctxt, VCPU_SREG_GS);
3223 case 0xa9: /* pop gs */
3224 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
3225 if (rc != X86EMUL_CONTINUE)
3230 /* only subword offset */
3231 c->src.val &= (c->dst.bytes << 3) - 1;
3232 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
3234 case 0xac: /* shrd imm8, r, r/m */
3235 case 0xad: /* shrd cl, r, r/m */
3236 emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
3238 case 0xae: /* clflush */
3240 case 0xb0 ... 0xb1: /* cmpxchg */
3242 * Save real source value, then compare EAX against
3245 c->src.orig_val = c->src.val;
3246 c->src.val = c->regs[VCPU_REGS_RAX];
3247 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3248 if (ctxt->eflags & EFLG_ZF) {
3249 /* Success: write back to memory. */
3250 c->dst.val = c->src.orig_val;
3252 /* Failure: write the value we saw to EAX. */
3253 c->dst.type = OP_REG;
3254 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
3259 /* only subword offset */
3260 c->src.val &= (c->dst.bytes << 3) - 1;
3261 emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
3263 case 0xb6 ... 0xb7: /* movzx */
3264 c->dst.bytes = c->op_bytes;
3265 c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
3268 case 0xba: /* Grp8 */
3269 switch (c->modrm_reg & 3) {
3282 /* only subword offset */
3283 c->src.val &= (c->dst.bytes << 3) - 1;
3284 emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
3286 case 0xbe ... 0xbf: /* movsx */
3287 c->dst.bytes = c->op_bytes;
3288 c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
3291 case 0xc3: /* movnti */
3292 c->dst.bytes = c->op_bytes;
3293 c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
3296 case 0xc7: /* Grp9 (cmpxchg8b) */
3297 rc = emulate_grp9(ctxt, ops);
3298 if (rc != X86EMUL_CONTINUE)
3305 DPRINTF("Cannot emulate %02x\n", c->b);