1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
12 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
14 * Avi Kivity <avi@qumranet.com>
15 * Yaniv Kamay <yaniv@qumranet.com>
17 * This work is licensed under the terms of the GNU GPL, version 2. See
18 * the COPYING file in the top-level directory.
20 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
23 #include <linux/kvm_host.h>
24 #include "kvm_cache_regs.h"
25 #include <asm/kvm_emulate.h>
26 #include <linux/stringify.h>
27 #include <asm/debugreg.h>
36 #define OpImplicit 1ull /* No generic decode */
37 #define OpReg 2ull /* Register */
38 #define OpMem 3ull /* Memory */
39 #define OpAcc 4ull /* Accumulator: AL/AX/EAX/RAX */
40 #define OpDI 5ull /* ES:DI/EDI/RDI */
41 #define OpMem64 6ull /* Memory, 64-bit */
42 #define OpImmUByte 7ull /* Zero-extended 8-bit immediate */
43 #define OpDX 8ull /* DX register */
44 #define OpCL 9ull /* CL register (for shifts) */
45 #define OpImmByte 10ull /* 8-bit sign extended immediate */
46 #define OpOne 11ull /* Implied 1 */
47 #define OpImm 12ull /* Sign extended up to 32-bit immediate */
48 #define OpMem16 13ull /* Memory operand (16-bit). */
49 #define OpMem32 14ull /* Memory operand (32-bit). */
50 #define OpImmU 15ull /* Immediate operand, zero extended */
51 #define OpSI 16ull /* SI/ESI/RSI */
52 #define OpImmFAddr 17ull /* Immediate far address */
53 #define OpMemFAddr 18ull /* Far address in memory */
54 #define OpImmU16 19ull /* Immediate operand, 16 bits, zero extended */
55 #define OpES 20ull /* ES */
56 #define OpCS 21ull /* CS */
57 #define OpSS 22ull /* SS */
58 #define OpDS 23ull /* DS */
59 #define OpFS 24ull /* FS */
60 #define OpGS 25ull /* GS */
61 #define OpMem8 26ull /* 8-bit zero extended memory operand */
62 #define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */
63 #define OpXLat 28ull /* memory at BX/EBX/RBX + zero-extended AL */
64 #define OpAccLo 29ull /* Low part of extended acc (AX/AX/EAX/RAX) */
65 #define OpAccHi 30ull /* High part of extended acc (-/DX/EDX/RDX) */
67 #define OpBits 5 /* Width of operand field */
68 #define OpMask ((1ull << OpBits) - 1)
71 * Opcode effective-address decode tables.
72 * Note that we only emulate instructions that have at least one memory
73 * operand (excluding implicit stack references). We assume that stack
74 * references and instruction fetches will never occur in special memory
75 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
79 /* Operand sizes: 8-bit operands or specified/overridden size. */
80 #define ByteOp (1<<0) /* 8-bit operands. */
81 /* Destination operand type. */
83 #define ImplicitOps (OpImplicit << DstShift)
84 #define DstReg (OpReg << DstShift)
85 #define DstMem (OpMem << DstShift)
86 #define DstAcc (OpAcc << DstShift)
87 #define DstDI (OpDI << DstShift)
88 #define DstMem64 (OpMem64 << DstShift)
89 #define DstMem16 (OpMem16 << DstShift)
90 #define DstImmUByte (OpImmUByte << DstShift)
91 #define DstDX (OpDX << DstShift)
92 #define DstAccLo (OpAccLo << DstShift)
93 #define DstMask (OpMask << DstShift)
94 /* Source operand type. */
96 #define SrcNone (OpNone << SrcShift)
97 #define SrcReg (OpReg << SrcShift)
98 #define SrcMem (OpMem << SrcShift)
99 #define SrcMem16 (OpMem16 << SrcShift)
100 #define SrcMem32 (OpMem32 << SrcShift)
101 #define SrcImm (OpImm << SrcShift)
102 #define SrcImmByte (OpImmByte << SrcShift)
103 #define SrcOne (OpOne << SrcShift)
104 #define SrcImmUByte (OpImmUByte << SrcShift)
105 #define SrcImmU (OpImmU << SrcShift)
106 #define SrcSI (OpSI << SrcShift)
107 #define SrcXLat (OpXLat << SrcShift)
108 #define SrcImmFAddr (OpImmFAddr << SrcShift)
109 #define SrcMemFAddr (OpMemFAddr << SrcShift)
110 #define SrcAcc (OpAcc << SrcShift)
111 #define SrcImmU16 (OpImmU16 << SrcShift)
112 #define SrcImm64 (OpImm64 << SrcShift)
113 #define SrcDX (OpDX << SrcShift)
114 #define SrcMem8 (OpMem8 << SrcShift)
115 #define SrcAccHi (OpAccHi << SrcShift)
116 #define SrcMask (OpMask << SrcShift)
117 #define BitOp (1<<11)
118 #define MemAbs (1<<12) /* Memory operand is absolute displacement */
119 #define String (1<<13) /* String instruction (rep capable) */
120 #define Stack (1<<14) /* Stack instruction (push/pop) */
121 #define GroupMask (7<<15) /* Opcode uses one of the group mechanisms */
122 #define Group (1<<15) /* Bits 3:5 of modrm byte extend opcode */
123 #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */
124 #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */
125 #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
126 #define Escape (5<<15) /* Escape to coprocessor instruction */
127 #define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
128 #define ModeDual (7<<15) /* Different instruction for 32/64 bit */
129 #define Sse (1<<18) /* SSE Vector instruction */
130 /* Generic ModRM decode. */
131 #define ModRM (1<<19)
132 /* Destination is only written; never read. */
135 #define Prot (1<<21) /* instruction generates #UD if not in prot-mode */
136 #define EmulateOnUD (1<<22) /* Emulate if unsupported by the host */
137 #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
138 #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */
139 #define Undefined (1<<25) /* No Such Instruction */
140 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
141 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
143 #define PageTable (1 << 29) /* instruction used to write page table */
144 #define NotImpl (1 << 30) /* instruction is not implemented */
145 /* Source 2 operand type */
146 #define Src2Shift (31)
147 #define Src2None (OpNone << Src2Shift)
148 #define Src2Mem (OpMem << Src2Shift)
149 #define Src2CL (OpCL << Src2Shift)
150 #define Src2ImmByte (OpImmByte << Src2Shift)
151 #define Src2One (OpOne << Src2Shift)
152 #define Src2Imm (OpImm << Src2Shift)
153 #define Src2ES (OpES << Src2Shift)
154 #define Src2CS (OpCS << Src2Shift)
155 #define Src2SS (OpSS << Src2Shift)
156 #define Src2DS (OpDS << Src2Shift)
157 #define Src2FS (OpFS << Src2Shift)
158 #define Src2GS (OpGS << Src2Shift)
159 #define Src2Mask (OpMask << Src2Shift)
160 #define Mmx ((u64)1 << 40) /* MMX Vector instruction */
161 #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
162 #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
163 #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
164 #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
165 #define NoWrite ((u64)1 << 45) /* No writeback */
166 #define SrcWrite ((u64)1 << 46) /* Write back src operand */
167 #define NoMod ((u64)1 << 47) /* Mod field is ignored */
168 #define Intercept ((u64)1 << 48) /* Has valid intercept field */
169 #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
170 #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
171 #define NearBranch ((u64)1 << 52) /* Near branches */
172 #define No16 ((u64)1 << 53) /* No 16 bit operand */
173 #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
175 #define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
177 #define X2(x...) x, x
178 #define X3(x...) X2(x), x
179 #define X4(x...) X2(x), X2(x)
180 #define X5(x...) X4(x), x
181 #define X6(x...) X4(x), X2(x)
182 #define X7(x...) X4(x), X3(x)
183 #define X8(x...) X4(x), X4(x)
184 #define X16(x...) X8(x), X8(x)
186 #define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
187 #define FASTOP_SIZE 8
190 * fastop functions have a special calling convention:
195 * flags: rflags (in/out)
196 * ex: rsi (in:fastop pointer, out:zero if exception)
198 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
199 * different operand sizes can be reached by calculation, rather than a jump
200 * table (which would be bigger than the code).
202 * fastop functions are declared as taking a never-defined fastop parameter,
203 * so they can't be called from C directly.
212 int (*execute)(struct x86_emulate_ctxt *ctxt);
213 const struct opcode *group;
214 const struct group_dual *gdual;
215 const struct gprefix *gprefix;
216 const struct escape *esc;
217 const struct instr_dual *idual;
218 const struct mode_dual *mdual;
219 void (*fastop)(struct fastop *fake);
221 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
225 struct opcode mod012[8];
226 struct opcode mod3[8];
230 struct opcode pfx_no;
231 struct opcode pfx_66;
232 struct opcode pfx_f2;
233 struct opcode pfx_f3;
238 struct opcode high[64];
242 struct opcode mod012;
247 struct opcode mode32;
248 struct opcode mode64;
251 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
253 enum x86_transfer_type {
255 X86_TRANSFER_CALL_JMP,
257 X86_TRANSFER_TASK_SWITCH,
260 static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
262 if (!(ctxt->regs_valid & (1 << nr))) {
263 ctxt->regs_valid |= 1 << nr;
264 ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
266 return ctxt->_regs[nr];
269 static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
271 ctxt->regs_valid |= 1 << nr;
272 ctxt->regs_dirty |= 1 << nr;
273 return &ctxt->_regs[nr];
276 static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
279 return reg_write(ctxt, nr);
282 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
286 for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
287 ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
290 static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
292 ctxt->regs_dirty = 0;
293 ctxt->regs_valid = 0;
297 * These EFLAGS bits are restored from saved value during emulation, and
298 * any changes are written back to the saved value after emulation.
300 #define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
301 X86_EFLAGS_PF|X86_EFLAGS_CF)
309 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
311 #define FOP_FUNC(name) \
312 ".align " __stringify(FASTOP_SIZE) " \n\t" \
313 ".type " name ", @function \n\t" \
316 #define FOP_RET "ret \n\t"
318 #define FOP_START(op) \
319 extern void em_##op(struct fastop *fake); \
320 asm(".pushsection .text, \"ax\" \n\t" \
321 ".global em_" #op " \n\t" \
328 FOP_FUNC(__stringify(__UNIQUE_ID(nop))) \
331 #define FOP1E(op, dst) \
332 FOP_FUNC(#op "_" #dst) \
333 "10: " #op " %" #dst " \n\t" FOP_RET
335 #define FOP1EEX(op, dst) \
336 FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
338 #define FASTOP1(op) \
343 ON64(FOP1E(op##q, rax)) \
346 /* 1-operand, using src2 (for MUL/DIV r/m) */
347 #define FASTOP1SRC2(op, name) \
352 ON64(FOP1E(op, rcx)) \
355 /* 1-operand, using src2 (for MUL/DIV r/m), with exceptions */
356 #define FASTOP1SRC2EX(op, name) \
361 ON64(FOP1EEX(op, rcx)) \
364 #define FOP2E(op, dst, src) \
365 FOP_FUNC(#op "_" #dst "_" #src) \
366 #op " %" #src ", %" #dst " \n\t" FOP_RET
368 #define FASTOP2(op) \
370 FOP2E(op##b, al, dl) \
371 FOP2E(op##w, ax, dx) \
372 FOP2E(op##l, eax, edx) \
373 ON64(FOP2E(op##q, rax, rdx)) \
376 /* 2 operand, word only */
377 #define FASTOP2W(op) \
380 FOP2E(op##w, ax, dx) \
381 FOP2E(op##l, eax, edx) \
382 ON64(FOP2E(op##q, rax, rdx)) \
385 /* 2 operand, src is CL */
386 #define FASTOP2CL(op) \
388 FOP2E(op##b, al, cl) \
389 FOP2E(op##w, ax, cl) \
390 FOP2E(op##l, eax, cl) \
391 ON64(FOP2E(op##q, rax, cl)) \
394 /* 2 operand, src and dest are reversed */
395 #define FASTOP2R(op, name) \
397 FOP2E(op##b, dl, al) \
398 FOP2E(op##w, dx, ax) \
399 FOP2E(op##l, edx, eax) \
400 ON64(FOP2E(op##q, rdx, rax)) \
403 #define FOP3E(op, dst, src, src2) \
404 FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
405 #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
407 /* 3-operand, word-only, src2=cl */
408 #define FASTOP3WCL(op) \
411 FOP3E(op##w, ax, dx, cl) \
412 FOP3E(op##l, eax, edx, cl) \
413 ON64(FOP3E(op##q, rax, rdx, cl)) \
416 /* Special case for SETcc - 1 instruction per cc */
417 #define FOP_SETCC(op) \
419 ".type " #op ", @function \n\t" \
424 asm(".global kvm_fastop_exception \n"
425 "kvm_fastop_exception: xor %esi, %esi; ret");
446 FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
449 static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
450 enum x86_intercept intercept,
451 enum x86_intercept_stage stage)
453 struct x86_instruction_info info = {
454 .intercept = intercept,
455 .rep_prefix = ctxt->rep_prefix,
456 .modrm_mod = ctxt->modrm_mod,
457 .modrm_reg = ctxt->modrm_reg,
458 .modrm_rm = ctxt->modrm_rm,
459 .src_val = ctxt->src.val64,
460 .dst_val = ctxt->dst.val64,
461 .src_bytes = ctxt->src.bytes,
462 .dst_bytes = ctxt->dst.bytes,
463 .ad_bytes = ctxt->ad_bytes,
464 .next_rip = ctxt->eip,
467 return ctxt->ops->intercept(ctxt, &info, stage);
470 static void assign_masked(ulong *dest, ulong src, ulong mask)
472 *dest = (*dest & ~mask) | (src & mask);
475 static void assign_register(unsigned long *reg, u64 val, int bytes)
477 /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
480 *(u8 *)reg = (u8)val;
483 *(u16 *)reg = (u16)val;
487 break; /* 64b: zero-extend */
494 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
496 return (1UL << (ctxt->ad_bytes << 3)) - 1;
499 static ulong stack_mask(struct x86_emulate_ctxt *ctxt)
502 struct desc_struct ss;
504 if (ctxt->mode == X86EMUL_MODE_PROT64)
506 ctxt->ops->get_segment(ctxt, &sel, &ss, NULL, VCPU_SREG_SS);
507 return ~0U >> ((ss.d ^ 1) * 16); /* d=0: 0xffff; d=1: 0xffffffff */
510 static int stack_size(struct x86_emulate_ctxt *ctxt)
512 return (__fls(stack_mask(ctxt)) + 1) >> 3;
515 /* Access/update address held in a register, based on addressing mode. */
516 static inline unsigned long
517 address_mask(struct x86_emulate_ctxt *ctxt, unsigned long reg)
519 if (ctxt->ad_bytes == sizeof(unsigned long))
522 return reg & ad_mask(ctxt);
525 static inline unsigned long
526 register_address(struct x86_emulate_ctxt *ctxt, int reg)
528 return address_mask(ctxt, reg_read(ctxt, reg));
531 static void masked_increment(ulong *reg, ulong mask, int inc)
533 assign_masked(reg, *reg + inc, mask);
537 register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
539 ulong *preg = reg_rmw(ctxt, reg);
541 assign_register(preg, *preg + inc, ctxt->ad_bytes);
544 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
546 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
549 static u32 desc_limit_scaled(struct desc_struct *desc)
551 u32 limit = get_desc_limit(desc);
553 return desc->g ? (limit << 12) | 0xfff : limit;
556 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
558 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
561 return ctxt->ops->get_cached_segment_base(ctxt, seg);
564 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
565 u32 error, bool valid)
568 ctxt->exception.vector = vec;
569 ctxt->exception.error_code = error;
570 ctxt->exception.error_code_valid = valid;
571 return X86EMUL_PROPAGATE_FAULT;
574 static int emulate_db(struct x86_emulate_ctxt *ctxt)
576 return emulate_exception(ctxt, DB_VECTOR, 0, false);
579 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
581 return emulate_exception(ctxt, GP_VECTOR, err, true);
584 static int emulate_ss(struct x86_emulate_ctxt *ctxt, int err)
586 return emulate_exception(ctxt, SS_VECTOR, err, true);
589 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
591 return emulate_exception(ctxt, UD_VECTOR, 0, false);
594 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
596 return emulate_exception(ctxt, TS_VECTOR, err, true);
599 static int emulate_de(struct x86_emulate_ctxt *ctxt)
601 return emulate_exception(ctxt, DE_VECTOR, 0, false);
604 static int emulate_nm(struct x86_emulate_ctxt *ctxt)
606 return emulate_exception(ctxt, NM_VECTOR, 0, false);
609 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
612 struct desc_struct desc;
614 ctxt->ops->get_segment(ctxt, &selector, &desc, NULL, seg);
618 static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
623 struct desc_struct desc;
625 ctxt->ops->get_segment(ctxt, &dummy, &desc, &base3, seg);
626 ctxt->ops->set_segment(ctxt, selector, &desc, base3, seg);
630 * x86 defines three classes of vector instructions: explicitly
631 * aligned, explicitly unaligned, and the rest, which change behaviour
632 * depending on whether they're AVX encoded or not.
634 * Also included is CMPXCHG16B which is not a vector instruction, yet it is
635 * subject to the same check.
637 static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
639 if (likely(size < 16))
642 if (ctxt->d & Aligned)
644 else if (ctxt->d & Unaligned)
646 else if (ctxt->d & Avx)
652 static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
653 struct segmented_address addr,
654 unsigned *max_size, unsigned size,
655 bool write, bool fetch,
656 enum x86emul_mode mode, ulong *linear)
658 struct desc_struct desc;
664 la = seg_base(ctxt, addr.seg) + addr.ea;
667 case X86EMUL_MODE_PROT64:
669 if (is_noncanonical_address(la))
672 *max_size = min_t(u64, ~0u, (1ull << 48) - la);
673 if (size > *max_size)
677 *linear = la = (u32)la;
678 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
682 /* code segment in protected mode or read-only data segment */
683 if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
684 || !(desc.type & 2)) && write)
686 /* unreadable code segment */
687 if (!fetch && (desc.type & 8) && !(desc.type & 2))
689 lim = desc_limit_scaled(&desc);
690 if (!(desc.type & 8) && (desc.type & 4)) {
691 /* expand-down segment */
694 lim = desc.d ? 0xffffffff : 0xffff;
698 if (lim == 0xffffffff)
701 *max_size = (u64)lim + 1 - addr.ea;
702 if (size > *max_size)
707 if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
708 return emulate_gp(ctxt, 0);
709 return X86EMUL_CONTINUE;
711 if (addr.seg == VCPU_SREG_SS)
712 return emulate_ss(ctxt, 0);
714 return emulate_gp(ctxt, 0);
717 static int linearize(struct x86_emulate_ctxt *ctxt,
718 struct segmented_address addr,
719 unsigned size, bool write,
723 return __linearize(ctxt, addr, &max_size, size, write, false,
727 static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
728 enum x86emul_mode mode)
733 struct segmented_address addr = { .seg = VCPU_SREG_CS,
736 if (ctxt->op_bytes != sizeof(unsigned long))
737 addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
738 rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
739 if (rc == X86EMUL_CONTINUE)
740 ctxt->_eip = addr.ea;
744 static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
746 return assign_eip(ctxt, dst, ctxt->mode);
749 static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
750 const struct desc_struct *cs_desc)
752 enum x86emul_mode mode = ctxt->mode;
756 if (ctxt->mode >= X86EMUL_MODE_PROT16) {
760 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
762 mode = X86EMUL_MODE_PROT64;
764 mode = X86EMUL_MODE_PROT32; /* temporary value */
767 if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
768 mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
769 rc = assign_eip(ctxt, dst, mode);
770 if (rc == X86EMUL_CONTINUE)
775 static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
777 return assign_eip_near(ctxt, ctxt->_eip + rel);
780 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
781 struct segmented_address addr,
788 rc = linearize(ctxt, addr, size, false, &linear);
789 if (rc != X86EMUL_CONTINUE)
791 return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
795 * Prefetch the remaining bytes of the instruction without crossing page
796 * boundary if they are not in fetch_cache yet.
798 static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
801 unsigned size, max_size;
802 unsigned long linear;
803 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
804 struct segmented_address addr = { .seg = VCPU_SREG_CS,
805 .ea = ctxt->eip + cur_size };
808 * We do not know exactly how many bytes will be needed, and
809 * __linearize is expensive, so fetch as much as possible. We
810 * just have to avoid going beyond the 15 byte limit, the end
811 * of the segment, or the end of the page.
813 * __linearize is called with size 0 so that it does not do any
814 * boundary check itself. Instead, we use max_size to check
817 rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
819 if (unlikely(rc != X86EMUL_CONTINUE))
822 size = min_t(unsigned, 15UL ^ cur_size, max_size);
823 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
826 * One instruction can only straddle two pages,
827 * and one has been loaded at the beginning of
828 * x86_decode_insn. So, if not enough bytes
829 * still, we must have hit the 15-byte boundary.
831 if (unlikely(size < op_size))
832 return emulate_gp(ctxt, 0);
834 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
835 size, &ctxt->exception);
836 if (unlikely(rc != X86EMUL_CONTINUE))
838 ctxt->fetch.end += size;
839 return X86EMUL_CONTINUE;
842 static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
845 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
847 if (unlikely(done_size < size))
848 return __do_insn_fetch_bytes(ctxt, size - done_size);
850 return X86EMUL_CONTINUE;
853 /* Fetch next part of the instruction being emulated. */
854 #define insn_fetch(_type, _ctxt) \
857 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
858 if (rc != X86EMUL_CONTINUE) \
860 ctxt->_eip += sizeof(_type); \
861 _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \
862 ctxt->fetch.ptr += sizeof(_type); \
866 #define insn_fetch_arr(_arr, _size, _ctxt) \
868 rc = do_insn_fetch_bytes(_ctxt, _size); \
869 if (rc != X86EMUL_CONTINUE) \
871 ctxt->_eip += (_size); \
872 memcpy(_arr, ctxt->fetch.ptr, _size); \
873 ctxt->fetch.ptr += (_size); \
877 * Given the 'reg' portion of a ModRM byte, and a register block, return a
878 * pointer into the block that addresses the relevant register.
879 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
881 static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
885 int highbyte_regs = (ctxt->rex_prefix == 0) && byteop;
887 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
888 p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
890 p = reg_rmw(ctxt, modrm_reg);
894 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
895 struct segmented_address addr,
896 u16 *size, unsigned long *address, int op_bytes)
903 rc = segmented_read_std(ctxt, addr, size, 2);
904 if (rc != X86EMUL_CONTINUE)
907 rc = segmented_read_std(ctxt, addr, address, op_bytes);
921 FASTOP1SRC2(mul, mul_ex);
922 FASTOP1SRC2(imul, imul_ex);
923 FASTOP1SRC2EX(div, div_ex);
924 FASTOP1SRC2EX(idiv, idiv_ex);
953 FASTOP2R(cmp, cmp_r);
955 static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
957 /* If src is zero, do not writeback, but update flags */
958 if (ctxt->src.val == 0)
959 ctxt->dst.type = OP_NONE;
960 return fastop(ctxt, em_bsf);
963 static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
965 /* If src is zero, do not writeback, but update flags */
966 if (ctxt->src.val == 0)
967 ctxt->dst.type = OP_NONE;
968 return fastop(ctxt, em_bsr);
971 static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
974 void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf);
976 flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
977 asm("push %[flags]; popf; call *%[fastop]"
978 : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags));
982 static void fetch_register_operand(struct operand *op)
986 op->val = *(u8 *)op->addr.reg;
989 op->val = *(u16 *)op->addr.reg;
992 op->val = *(u32 *)op->addr.reg;
995 op->val = *(u64 *)op->addr.reg;
1000 static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
1002 ctxt->ops->get_fpu(ctxt);
1004 case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
1005 case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
1006 case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
1007 case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
1008 case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
1009 case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
1010 case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
1011 case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
1012 #ifdef CONFIG_X86_64
1013 case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
1014 case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
1015 case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
1016 case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
1017 case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
1018 case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
1019 case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
1020 case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
1024 ctxt->ops->put_fpu(ctxt);
1027 static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
1030 ctxt->ops->get_fpu(ctxt);
1032 case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
1033 case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
1034 case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
1035 case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
1036 case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
1037 case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
1038 case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
1039 case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
1040 #ifdef CONFIG_X86_64
1041 case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
1042 case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
1043 case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
1044 case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
1045 case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
1046 case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
1047 case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
1048 case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
1052 ctxt->ops->put_fpu(ctxt);
1055 static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1057 ctxt->ops->get_fpu(ctxt);
1059 case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
1060 case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
1061 case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
1062 case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
1063 case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
1064 case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
1065 case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
1066 case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
1069 ctxt->ops->put_fpu(ctxt);
1072 static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg)
1074 ctxt->ops->get_fpu(ctxt);
1076 case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
1077 case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
1078 case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
1079 case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
1080 case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
1081 case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
1082 case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
1083 case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
1086 ctxt->ops->put_fpu(ctxt);
1089 static int em_fninit(struct x86_emulate_ctxt *ctxt)
1091 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1092 return emulate_nm(ctxt);
1094 ctxt->ops->get_fpu(ctxt);
1095 asm volatile("fninit");
1096 ctxt->ops->put_fpu(ctxt);
1097 return X86EMUL_CONTINUE;
1100 static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1104 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1105 return emulate_nm(ctxt);
1107 ctxt->ops->get_fpu(ctxt);
1108 asm volatile("fnstcw %0": "+m"(fcw));
1109 ctxt->ops->put_fpu(ctxt);
1111 ctxt->dst.val = fcw;
1113 return X86EMUL_CONTINUE;
1116 static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1120 if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
1121 return emulate_nm(ctxt);
1123 ctxt->ops->get_fpu(ctxt);
1124 asm volatile("fnstsw %0": "+m"(fsw));
1125 ctxt->ops->put_fpu(ctxt);
1127 ctxt->dst.val = fsw;
1129 return X86EMUL_CONTINUE;
1132 static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
1135 unsigned reg = ctxt->modrm_reg;
1137 if (!(ctxt->d & ModRM))
1138 reg = (ctxt->b & 7) | ((ctxt->rex_prefix & 1) << 3);
1140 if (ctxt->d & Sse) {
1144 read_sse_reg(ctxt, &op->vec_val, reg);
1147 if (ctxt->d & Mmx) {
1156 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1157 op->addr.reg = decode_register(ctxt, reg, ctxt->d & ByteOp);
1159 fetch_register_operand(op);
1160 op->orig_val = op->val;
1163 static void adjust_modrm_seg(struct x86_emulate_ctxt *ctxt, int base_reg)
1165 if (base_reg == VCPU_REGS_RSP || base_reg == VCPU_REGS_RBP)
1166 ctxt->modrm_seg = VCPU_SREG_SS;
1169 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1173 int index_reg, base_reg, scale;
1174 int rc = X86EMUL_CONTINUE;
1177 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1178 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1179 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1181 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1182 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1183 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1184 ctxt->modrm_seg = VCPU_SREG_DS;
1186 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
1188 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
1189 op->addr.reg = decode_register(ctxt, ctxt->modrm_rm,
1191 if (ctxt->d & Sse) {
1194 op->addr.xmm = ctxt->modrm_rm;
1195 read_sse_reg(ctxt, &op->vec_val, ctxt->modrm_rm);
1198 if (ctxt->d & Mmx) {
1201 op->addr.mm = ctxt->modrm_rm & 7;
1204 fetch_register_operand(op);
1210 if (ctxt->ad_bytes == 2) {
1211 unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
1212 unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
1213 unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
1214 unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
1216 /* 16-bit ModR/M decode. */
1217 switch (ctxt->modrm_mod) {
1219 if (ctxt->modrm_rm == 6)
1220 modrm_ea += insn_fetch(u16, ctxt);
1223 modrm_ea += insn_fetch(s8, ctxt);
1226 modrm_ea += insn_fetch(u16, ctxt);
1229 switch (ctxt->modrm_rm) {
1231 modrm_ea += bx + si;
1234 modrm_ea += bx + di;
1237 modrm_ea += bp + si;
1240 modrm_ea += bp + di;
1249 if (ctxt->modrm_mod != 0)
1256 if (ctxt->modrm_rm == 2 || ctxt->modrm_rm == 3 ||
1257 (ctxt->modrm_rm == 6 && ctxt->modrm_mod != 0))
1258 ctxt->modrm_seg = VCPU_SREG_SS;
1259 modrm_ea = (u16)modrm_ea;
1261 /* 32/64-bit ModR/M decode. */
1262 if ((ctxt->modrm_rm & 7) == 4) {
1263 sib = insn_fetch(u8, ctxt);
1264 index_reg |= (sib >> 3) & 7;
1265 base_reg |= sib & 7;
1268 if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
1269 modrm_ea += insn_fetch(s32, ctxt);
1271 modrm_ea += reg_read(ctxt, base_reg);
1272 adjust_modrm_seg(ctxt, base_reg);
1273 /* Increment ESP on POP [ESP] */
1274 if ((ctxt->d & IncSP) &&
1275 base_reg == VCPU_REGS_RSP)
1276 modrm_ea += ctxt->op_bytes;
1279 modrm_ea += reg_read(ctxt, index_reg) << scale;
1280 } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
1281 modrm_ea += insn_fetch(s32, ctxt);
1282 if (ctxt->mode == X86EMUL_MODE_PROT64)
1283 ctxt->rip_relative = 1;
1285 base_reg = ctxt->modrm_rm;
1286 modrm_ea += reg_read(ctxt, base_reg);
1287 adjust_modrm_seg(ctxt, base_reg);
1289 switch (ctxt->modrm_mod) {
1291 modrm_ea += insn_fetch(s8, ctxt);
1294 modrm_ea += insn_fetch(s32, ctxt);
1298 op->addr.mem.ea = modrm_ea;
1299 if (ctxt->ad_bytes != 8)
1300 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1306 static int decode_abs(struct x86_emulate_ctxt *ctxt,
1309 int rc = X86EMUL_CONTINUE;
1312 switch (ctxt->ad_bytes) {
1314 op->addr.mem.ea = insn_fetch(u16, ctxt);
1317 op->addr.mem.ea = insn_fetch(u32, ctxt);
1320 op->addr.mem.ea = insn_fetch(u64, ctxt);
1327 static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1331 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1332 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1334 if (ctxt->src.bytes == 2)
1335 sv = (s16)ctxt->src.val & (s16)mask;
1336 else if (ctxt->src.bytes == 4)
1337 sv = (s32)ctxt->src.val & (s32)mask;
1339 sv = (s64)ctxt->src.val & (s64)mask;
1341 ctxt->dst.addr.mem.ea = address_mask(ctxt,
1342 ctxt->dst.addr.mem.ea + (sv >> 3));
1345 /* only subword offset */
1346 ctxt->src.val &= (ctxt->dst.bytes << 3) - 1;
1349 static int read_emulated(struct x86_emulate_ctxt *ctxt,
1350 unsigned long addr, void *dest, unsigned size)
1353 struct read_cache *mc = &ctxt->mem_read;
1355 if (mc->pos < mc->end)
1358 WARN_ON((mc->end + size) >= sizeof(mc->data));
1360 rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
1362 if (rc != X86EMUL_CONTINUE)
1368 memcpy(dest, mc->data + mc->pos, size);
1370 return X86EMUL_CONTINUE;
1373 static int segmented_read(struct x86_emulate_ctxt *ctxt,
1374 struct segmented_address addr,
1381 rc = linearize(ctxt, addr, size, false, &linear);
1382 if (rc != X86EMUL_CONTINUE)
1384 return read_emulated(ctxt, linear, data, size);
1387 static int segmented_write(struct x86_emulate_ctxt *ctxt,
1388 struct segmented_address addr,
1395 rc = linearize(ctxt, addr, size, true, &linear);
1396 if (rc != X86EMUL_CONTINUE)
1398 return ctxt->ops->write_emulated(ctxt, linear, data, size,
1402 static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt,
1403 struct segmented_address addr,
1404 const void *orig_data, const void *data,
1410 rc = linearize(ctxt, addr, size, true, &linear);
1411 if (rc != X86EMUL_CONTINUE)
1413 return ctxt->ops->cmpxchg_emulated(ctxt, linear, orig_data, data,
1414 size, &ctxt->exception);
1417 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1418 unsigned int size, unsigned short port,
1421 struct read_cache *rc = &ctxt->io_read;
1423 if (rc->pos == rc->end) { /* refill pio read ahead */
1424 unsigned int in_page, n;
1425 unsigned int count = ctxt->rep_prefix ?
1426 address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
1427 in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
1428 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1429 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1430 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1433 rc->pos = rc->end = 0;
1434 if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n))
1439 if (ctxt->rep_prefix && (ctxt->d & String) &&
1440 !(ctxt->eflags & X86_EFLAGS_DF)) {
1441 ctxt->dst.data = rc->data + rc->pos;
1442 ctxt->dst.type = OP_MEM_STR;
1443 ctxt->dst.count = (rc->end - rc->pos) / size;
1446 memcpy(dest, rc->data + rc->pos, size);
1452 static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
1453 u16 index, struct desc_struct *desc)
1458 ctxt->ops->get_idt(ctxt, &dt);
1460 if (dt.size < index * 8 + 7)
1461 return emulate_gp(ctxt, index << 3 | 0x2);
1463 addr = dt.address + index * 8;
1464 return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
1468 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1469 u16 selector, struct desc_ptr *dt)
1471 const struct x86_emulate_ops *ops = ctxt->ops;
1474 if (selector & 1 << 2) {
1475 struct desc_struct desc;
1478 memset (dt, 0, sizeof *dt);
1479 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1483 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1484 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1486 ops->get_gdt(ctxt, dt);
1489 static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1490 u16 selector, ulong *desc_addr_p)
1493 u16 index = selector >> 3;
1496 get_descriptor_table_ptr(ctxt, selector, &dt);
1498 if (dt.size < index * 8 + 7)
1499 return emulate_gp(ctxt, selector & 0xfffc);
1501 addr = dt.address + index * 8;
1503 #ifdef CONFIG_X86_64
1504 if (addr >> 32 != 0) {
1507 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1508 if (!(efer & EFER_LMA))
1513 *desc_addr_p = addr;
1514 return X86EMUL_CONTINUE;
1517 /* allowed just for 8 bytes segments */
1518 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1519 u16 selector, struct desc_struct *desc,
1524 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1525 if (rc != X86EMUL_CONTINUE)
1528 return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
1532 /* allowed just for 8 bytes segments */
1533 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1534 u16 selector, struct desc_struct *desc)
1539 rc = get_descriptor_ptr(ctxt, selector, &addr);
1540 if (rc != X86EMUL_CONTINUE)
1543 return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
1547 /* Does not support long mode */
1548 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1549 u16 selector, int seg, u8 cpl,
1550 enum x86_transfer_type transfer,
1551 struct desc_struct *desc)
1553 struct desc_struct seg_desc, old_desc;
1555 unsigned err_vec = GP_VECTOR;
1557 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1563 memset(&seg_desc, 0, sizeof seg_desc);
1565 if (ctxt->mode == X86EMUL_MODE_REAL) {
1566 /* set real mode segment descriptor (keep limit etc. for
1568 ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
1569 set_desc_base(&seg_desc, selector << 4);
1571 } else if (seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) {
1572 /* VM86 needs a clean new segment descriptor */
1573 set_desc_base(&seg_desc, selector << 4);
1574 set_desc_limit(&seg_desc, 0xffff);
1584 /* NULL selector is not valid for TR, CS and SS (except for long mode) */
1585 if ((seg == VCPU_SREG_CS
1586 || (seg == VCPU_SREG_SS
1587 && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl))
1588 || seg == VCPU_SREG_TR)
1592 /* TR should be in GDT only */
1593 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1596 if (null_selector) /* for NULL selector skip all following checks */
1599 ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
1600 if (ret != X86EMUL_CONTINUE)
1603 err_code = selector & 0xfffc;
1604 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1607 /* can't load system descriptor into segment selector */
1608 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1609 if (transfer == X86_TRANSFER_CALL_JMP)
1610 return X86EMUL_UNHANDLEABLE;
1615 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1624 * segment is not a writable data segment or segment
1625 * selector's RPL != CPL or segment selector's RPL != CPL
1627 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1631 if (!(seg_desc.type & 8))
1634 if (seg_desc.type & 4) {
1640 if (rpl > cpl || dpl != cpl)
1643 /* in long-mode d/b must be clear if l is set */
1644 if (seg_desc.d && seg_desc.l) {
1647 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1648 if (efer & EFER_LMA)
1652 /* CS(RPL) <- CPL */
1653 selector = (selector & 0xfffc) | cpl;
1656 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1658 old_desc = seg_desc;
1659 seg_desc.type |= 2; /* busy */
1660 ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
1661 sizeof(seg_desc), &ctxt->exception);
1662 if (ret != X86EMUL_CONTINUE)
1665 case VCPU_SREG_LDTR:
1666 if (seg_desc.s || seg_desc.type != 2)
1669 default: /* DS, ES, FS, or GS */
1671 * segment is not a data or readable code segment or
1672 * ((segment is a data or nonconforming code segment)
1673 * and (both RPL and CPL > DPL))
1675 if ((seg_desc.type & 0xa) == 0x8 ||
1676 (((seg_desc.type & 0xc) != 0xc) &&
1677 (rpl > dpl && cpl > dpl)))
1683 /* mark segment as accessed */
1684 if (!(seg_desc.type & 1)) {
1686 ret = write_segment_descriptor(ctxt, selector,
1688 if (ret != X86EMUL_CONTINUE)
1691 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1692 ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
1693 sizeof(base3), &ctxt->exception);
1694 if (ret != X86EMUL_CONTINUE)
1696 if (is_noncanonical_address(get_desc_base(&seg_desc) |
1697 ((u64)base3 << 32)))
1698 return emulate_gp(ctxt, 0);
1701 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1704 return X86EMUL_CONTINUE;
1706 return emulate_exception(ctxt, err_vec, err_code, true);
1709 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1710 u16 selector, int seg)
1712 u8 cpl = ctxt->ops->cpl(ctxt);
1713 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1714 X86_TRANSFER_NONE, NULL);
1717 static void write_register_operand(struct operand *op)
1719 return assign_register(op->addr.reg, op->val, op->bytes);
1722 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1726 write_register_operand(op);
1729 if (ctxt->lock_prefix)
1730 return segmented_cmpxchg(ctxt,
1736 return segmented_write(ctxt,
1742 return segmented_write(ctxt,
1745 op->bytes * op->count);
1748 write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
1751 write_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
1759 return X86EMUL_CONTINUE;
1762 static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
1764 struct segmented_address addr;
1766 rsp_increment(ctxt, -bytes);
1767 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1768 addr.seg = VCPU_SREG_SS;
1770 return segmented_write(ctxt, addr, data, bytes);
1773 static int em_push(struct x86_emulate_ctxt *ctxt)
1775 /* Disable writeback. */
1776 ctxt->dst.type = OP_NONE;
1777 return push(ctxt, &ctxt->src.val, ctxt->op_bytes);
1780 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1781 void *dest, int len)
1784 struct segmented_address addr;
1786 addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
1787 addr.seg = VCPU_SREG_SS;
1788 rc = segmented_read(ctxt, addr, dest, len);
1789 if (rc != X86EMUL_CONTINUE)
1792 rsp_increment(ctxt, len);
1796 static int em_pop(struct x86_emulate_ctxt *ctxt)
1798 return emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1801 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1802 void *dest, int len)
1805 unsigned long val, change_mask;
1806 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
1807 int cpl = ctxt->ops->cpl(ctxt);
1809 rc = emulate_pop(ctxt, &val, len);
1810 if (rc != X86EMUL_CONTINUE)
1813 change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
1814 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
1815 X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
1816 X86_EFLAGS_AC | X86_EFLAGS_ID;
1818 switch(ctxt->mode) {
1819 case X86EMUL_MODE_PROT64:
1820 case X86EMUL_MODE_PROT32:
1821 case X86EMUL_MODE_PROT16:
1823 change_mask |= X86_EFLAGS_IOPL;
1825 change_mask |= X86_EFLAGS_IF;
1827 case X86EMUL_MODE_VM86:
1829 return emulate_gp(ctxt, 0);
1830 change_mask |= X86_EFLAGS_IF;
1832 default: /* real mode */
1833 change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
1837 *(unsigned long *)dest =
1838 (ctxt->eflags & ~change_mask) | (val & change_mask);
1843 static int em_popf(struct x86_emulate_ctxt *ctxt)
1845 ctxt->dst.type = OP_REG;
1846 ctxt->dst.addr.reg = &ctxt->eflags;
1847 ctxt->dst.bytes = ctxt->op_bytes;
1848 return emulate_popf(ctxt, &ctxt->dst.val, ctxt->op_bytes);
1851 static int em_enter(struct x86_emulate_ctxt *ctxt)
1854 unsigned frame_size = ctxt->src.val;
1855 unsigned nesting_level = ctxt->src2.val & 31;
1859 return X86EMUL_UNHANDLEABLE;
1861 rbp = reg_read(ctxt, VCPU_REGS_RBP);
1862 rc = push(ctxt, &rbp, stack_size(ctxt));
1863 if (rc != X86EMUL_CONTINUE)
1865 assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
1867 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
1868 reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
1870 return X86EMUL_CONTINUE;
1873 static int em_leave(struct x86_emulate_ctxt *ctxt)
1875 assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
1877 return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
1880 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
1882 int seg = ctxt->src2.val;
1884 ctxt->src.val = get_segment_selector(ctxt, seg);
1885 if (ctxt->op_bytes == 4) {
1886 rsp_increment(ctxt, -2);
1890 return em_push(ctxt);
1893 static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1895 int seg = ctxt->src2.val;
1896 unsigned long selector;
1899 rc = emulate_pop(ctxt, &selector, 2);
1900 if (rc != X86EMUL_CONTINUE)
1903 if (ctxt->modrm_reg == VCPU_SREG_SS)
1904 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1905 if (ctxt->op_bytes > 2)
1906 rsp_increment(ctxt, ctxt->op_bytes - 2);
1908 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1912 static int em_pusha(struct x86_emulate_ctxt *ctxt)
1914 unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
1915 int rc = X86EMUL_CONTINUE;
1916 int reg = VCPU_REGS_RAX;
1918 while (reg <= VCPU_REGS_RDI) {
1919 (reg == VCPU_REGS_RSP) ?
1920 (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
1923 if (rc != X86EMUL_CONTINUE)
1932 static int em_pushf(struct x86_emulate_ctxt *ctxt)
1934 ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
1935 return em_push(ctxt);
1938 static int em_popa(struct x86_emulate_ctxt *ctxt)
1940 int rc = X86EMUL_CONTINUE;
1941 int reg = VCPU_REGS_RDI;
1944 while (reg >= VCPU_REGS_RAX) {
1945 if (reg == VCPU_REGS_RSP) {
1946 rsp_increment(ctxt, ctxt->op_bytes);
1950 rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
1951 if (rc != X86EMUL_CONTINUE)
1953 assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
1959 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
1961 const struct x86_emulate_ops *ops = ctxt->ops;
1968 /* TODO: Add limit checks */
1969 ctxt->src.val = ctxt->eflags;
1971 if (rc != X86EMUL_CONTINUE)
1974 ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
1976 ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
1978 if (rc != X86EMUL_CONTINUE)
1981 ctxt->src.val = ctxt->_eip;
1983 if (rc != X86EMUL_CONTINUE)
1986 ops->get_idt(ctxt, &dt);
1988 eip_addr = dt.address + (irq << 2);
1989 cs_addr = dt.address + (irq << 2) + 2;
1991 rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
1992 if (rc != X86EMUL_CONTINUE)
1995 rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
1996 if (rc != X86EMUL_CONTINUE)
1999 rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
2000 if (rc != X86EMUL_CONTINUE)
2008 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
2012 invalidate_registers(ctxt);
2013 rc = __emulate_int_real(ctxt, irq);
2014 if (rc == X86EMUL_CONTINUE)
2015 writeback_registers(ctxt);
2019 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
2021 switch(ctxt->mode) {
2022 case X86EMUL_MODE_REAL:
2023 return __emulate_int_real(ctxt, irq);
2024 case X86EMUL_MODE_VM86:
2025 case X86EMUL_MODE_PROT16:
2026 case X86EMUL_MODE_PROT32:
2027 case X86EMUL_MODE_PROT64:
2029 /* Protected mode interrupts unimplemented yet */
2030 return X86EMUL_UNHANDLEABLE;
2034 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2036 int rc = X86EMUL_CONTINUE;
2037 unsigned long temp_eip = 0;
2038 unsigned long temp_eflags = 0;
2039 unsigned long cs = 0;
2040 unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
2041 X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
2042 X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
2043 X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
2044 X86_EFLAGS_AC | X86_EFLAGS_ID |
2046 unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
2049 /* TODO: Add stack limit check */
2051 rc = emulate_pop(ctxt, &temp_eip, ctxt->op_bytes);
2053 if (rc != X86EMUL_CONTINUE)
2056 if (temp_eip & ~0xffff)
2057 return emulate_gp(ctxt, 0);
2059 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2061 if (rc != X86EMUL_CONTINUE)
2064 rc = emulate_pop(ctxt, &temp_eflags, ctxt->op_bytes);
2066 if (rc != X86EMUL_CONTINUE)
2069 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2071 if (rc != X86EMUL_CONTINUE)
2074 ctxt->_eip = temp_eip;
2076 if (ctxt->op_bytes == 4)
2077 ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
2078 else if (ctxt->op_bytes == 2) {
2079 ctxt->eflags &= ~0xffff;
2080 ctxt->eflags |= temp_eflags;
2083 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2084 ctxt->eflags |= X86_EFLAGS_FIXED;
2085 ctxt->ops->set_nmi_mask(ctxt, false);
2090 static int em_iret(struct x86_emulate_ctxt *ctxt)
2092 switch(ctxt->mode) {
2093 case X86EMUL_MODE_REAL:
2094 return emulate_iret_real(ctxt);
2095 case X86EMUL_MODE_VM86:
2096 case X86EMUL_MODE_PROT16:
2097 case X86EMUL_MODE_PROT32:
2098 case X86EMUL_MODE_PROT64:
2100 /* iret from protected mode unimplemented yet */
2101 return X86EMUL_UNHANDLEABLE;
2105 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2109 struct desc_struct new_desc;
2110 u8 cpl = ctxt->ops->cpl(ctxt);
2112 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2114 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2115 X86_TRANSFER_CALL_JMP,
2117 if (rc != X86EMUL_CONTINUE)
2120 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2121 /* Error handling is not implemented. */
2122 if (rc != X86EMUL_CONTINUE)
2123 return X86EMUL_UNHANDLEABLE;
2128 static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
2130 return assign_eip_near(ctxt, ctxt->src.val);
2133 static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
2138 old_eip = ctxt->_eip;
2139 rc = assign_eip_near(ctxt, ctxt->src.val);
2140 if (rc != X86EMUL_CONTINUE)
2142 ctxt->src.val = old_eip;
2147 static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2149 u64 old = ctxt->dst.orig_val64;
2151 if (ctxt->dst.bytes == 16)
2152 return X86EMUL_UNHANDLEABLE;
2154 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
2155 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
2156 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
2157 *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
2158 ctxt->eflags &= ~X86_EFLAGS_ZF;
2160 ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
2161 (u32) reg_read(ctxt, VCPU_REGS_RBX);
2163 ctxt->eflags |= X86_EFLAGS_ZF;
2165 return X86EMUL_CONTINUE;
2168 static int em_ret(struct x86_emulate_ctxt *ctxt)
2173 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2174 if (rc != X86EMUL_CONTINUE)
2177 return assign_eip_near(ctxt, eip);
2180 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2183 unsigned long eip, cs;
2184 int cpl = ctxt->ops->cpl(ctxt);
2185 struct desc_struct new_desc;
2187 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2188 if (rc != X86EMUL_CONTINUE)
2190 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2191 if (rc != X86EMUL_CONTINUE)
2193 /* Outer-privilege level return is not implemented */
2194 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2195 return X86EMUL_UNHANDLEABLE;
2196 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2199 if (rc != X86EMUL_CONTINUE)
2201 rc = assign_eip_far(ctxt, eip, &new_desc);
2202 /* Error handling is not implemented. */
2203 if (rc != X86EMUL_CONTINUE)
2204 return X86EMUL_UNHANDLEABLE;
2209 static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2213 rc = em_ret_far(ctxt);
2214 if (rc != X86EMUL_CONTINUE)
2216 rsp_increment(ctxt, ctxt->src.val);
2217 return X86EMUL_CONTINUE;
2220 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2222 /* Save real source value, then compare EAX against destination. */
2223 ctxt->dst.orig_val = ctxt->dst.val;
2224 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2225 ctxt->src.orig_val = ctxt->src.val;
2226 ctxt->src.val = ctxt->dst.orig_val;
2227 fastop(ctxt, em_cmp);
2229 if (ctxt->eflags & X86_EFLAGS_ZF) {
2230 /* Success: write back to memory; no update of EAX */
2231 ctxt->src.type = OP_NONE;
2232 ctxt->dst.val = ctxt->src.orig_val;
2234 /* Failure: write the value we saw to EAX. */
2235 ctxt->src.type = OP_REG;
2236 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2237 ctxt->src.val = ctxt->dst.orig_val;
2238 /* Create write-cycle to dest by writing the same value */
2239 ctxt->dst.val = ctxt->dst.orig_val;
2241 return X86EMUL_CONTINUE;
2244 static int em_lseg(struct x86_emulate_ctxt *ctxt)
2246 int seg = ctxt->src2.val;
2250 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2252 rc = load_segment_descriptor(ctxt, sel, seg);
2253 if (rc != X86EMUL_CONTINUE)
2256 ctxt->dst.val = ctxt->src.val;
2260 static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
2262 u32 eax, ebx, ecx, edx;
2266 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2267 return edx & bit(X86_FEATURE_LM);
2270 #define GET_SMSTATE(type, smbase, offset) \
2273 int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \
2275 if (r != X86EMUL_CONTINUE) \
2276 return X86EMUL_UNHANDLEABLE; \
2280 static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
2282 desc->g = (flags >> 23) & 1;
2283 desc->d = (flags >> 22) & 1;
2284 desc->l = (flags >> 21) & 1;
2285 desc->avl = (flags >> 20) & 1;
2286 desc->p = (flags >> 15) & 1;
2287 desc->dpl = (flags >> 13) & 3;
2288 desc->s = (flags >> 12) & 1;
2289 desc->type = (flags >> 8) & 15;
2292 static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2294 struct desc_struct desc;
2298 selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
2301 offset = 0x7f84 + n * 12;
2303 offset = 0x7f2c + (n - 3) * 12;
2305 set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
2306 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
2307 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
2308 ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
2309 return X86EMUL_CONTINUE;
2312 static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
2314 struct desc_struct desc;
2319 offset = 0x7e00 + n * 16;
2321 selector = GET_SMSTATE(u16, smbase, offset);
2322 rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
2323 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
2324 set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
2325 base3 = GET_SMSTATE(u32, smbase, offset + 12);
2327 ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
2328 return X86EMUL_CONTINUE;
2331 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
2337 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
2338 * Then enable protected mode. However, PCID cannot be enabled
2339 * if EFER.LMA=0, so set it separately.
2341 bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2343 return X86EMUL_UNHANDLEABLE;
2345 bad = ctxt->ops->set_cr(ctxt, 0, cr0);
2347 return X86EMUL_UNHANDLEABLE;
2349 if (cr4 & X86_CR4_PCIDE) {
2350 bad = ctxt->ops->set_cr(ctxt, 4, cr4);
2352 return X86EMUL_UNHANDLEABLE;
2355 return X86EMUL_CONTINUE;
2358 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
2360 struct desc_struct desc;
2366 cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
2367 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
2368 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
2369 ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
2371 for (i = 0; i < 8; i++)
2372 *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
2374 val = GET_SMSTATE(u32, smbase, 0x7fcc);
2375 ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2376 val = GET_SMSTATE(u32, smbase, 0x7fc8);
2377 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2379 selector = GET_SMSTATE(u32, smbase, 0x7fc4);
2380 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
2381 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
2382 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
2383 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
2385 selector = GET_SMSTATE(u32, smbase, 0x7fc0);
2386 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
2387 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
2388 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
2389 ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
2391 dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
2392 dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
2393 ctxt->ops->set_gdt(ctxt, &dt);
2395 dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
2396 dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
2397 ctxt->ops->set_idt(ctxt, &dt);
2399 for (i = 0; i < 6; i++) {
2400 int r = rsm_load_seg_32(ctxt, smbase, i);
2401 if (r != X86EMUL_CONTINUE)
2405 cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
2407 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
2409 return rsm_enter_protected_mode(ctxt, cr0, cr4);
2412 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
2414 struct desc_struct desc;
2421 for (i = 0; i < 16; i++)
2422 *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
2424 ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
2425 ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
2427 val = GET_SMSTATE(u32, smbase, 0x7f68);
2428 ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
2429 val = GET_SMSTATE(u32, smbase, 0x7f60);
2430 ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
2432 cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
2433 ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
2434 cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
2435 ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
2436 val = GET_SMSTATE(u64, smbase, 0x7ed0);
2437 ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
2439 selector = GET_SMSTATE(u32, smbase, 0x7e90);
2440 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
2441 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
2442 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
2443 base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
2444 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
2446 dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
2447 dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
2448 ctxt->ops->set_idt(ctxt, &dt);
2450 selector = GET_SMSTATE(u32, smbase, 0x7e70);
2451 rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
2452 set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
2453 set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
2454 base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
2455 ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
2457 dt.size = GET_SMSTATE(u32, smbase, 0x7e64);
2458 dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
2459 ctxt->ops->set_gdt(ctxt, &dt);
2461 r = rsm_enter_protected_mode(ctxt, cr0, cr4);
2462 if (r != X86EMUL_CONTINUE)
2465 for (i = 0; i < 6; i++) {
2466 r = rsm_load_seg_64(ctxt, smbase, i);
2467 if (r != X86EMUL_CONTINUE)
2471 return X86EMUL_CONTINUE;
2474 static int em_rsm(struct x86_emulate_ctxt *ctxt)
2476 unsigned long cr0, cr4, efer;
2480 if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
2481 return emulate_ud(ctxt);
2484 * Get back to real mode, to prepare a safe state in which to load
2485 * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU
2486 * supports long mode.
2488 cr4 = ctxt->ops->get_cr(ctxt, 4);
2489 if (emulator_has_longmode(ctxt)) {
2490 struct desc_struct cs_desc;
2492 /* Zero CR4.PCIDE before CR0.PG. */
2493 if (cr4 & X86_CR4_PCIDE) {
2494 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
2495 cr4 &= ~X86_CR4_PCIDE;
2498 /* A 32-bit code segment is required to clear EFER.LMA. */
2499 memset(&cs_desc, 0, sizeof(cs_desc));
2501 cs_desc.s = cs_desc.g = cs_desc.p = 1;
2502 ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
2505 /* For the 64-bit case, this will clear EFER.LMA. */
2506 cr0 = ctxt->ops->get_cr(ctxt, 0);
2507 if (cr0 & X86_CR0_PE)
2508 ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
2510 /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */
2511 if (cr4 & X86_CR4_PAE)
2512 ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
2514 /* And finally go back to 32-bit mode. */
2516 ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
2518 smbase = ctxt->ops->get_smbase(ctxt);
2519 if (emulator_has_longmode(ctxt))
2520 ret = rsm_load_state_64(ctxt, smbase + 0x8000);
2522 ret = rsm_load_state_32(ctxt, smbase + 0x8000);
2524 if (ret != X86EMUL_CONTINUE) {
2525 /* FIXME: should triple fault */
2526 return X86EMUL_UNHANDLEABLE;
2529 if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
2530 ctxt->ops->set_nmi_mask(ctxt, false);
2532 ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
2533 ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
2534 return X86EMUL_CONTINUE;
2538 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
2539 struct desc_struct *cs, struct desc_struct *ss)
2541 cs->l = 0; /* will be adjusted later */
2542 set_desc_base(cs, 0); /* flat segment */
2543 cs->g = 1; /* 4kb granularity */
2544 set_desc_limit(cs, 0xfffff); /* 4GB limit */
2545 cs->type = 0x0b; /* Read, Execute, Accessed */
2547 cs->dpl = 0; /* will be adjusted later */
2552 set_desc_base(ss, 0); /* flat segment */
2553 set_desc_limit(ss, 0xfffff); /* 4GB limit */
2554 ss->g = 1; /* 4kb granularity */
2556 ss->type = 0x03; /* Read/Write, Accessed */
2557 ss->d = 1; /* 32bit stack segment */
2564 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
2566 u32 eax, ebx, ecx, edx;
2569 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2570 return ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
2571 && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
2572 && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
2575 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
2577 const struct x86_emulate_ops *ops = ctxt->ops;
2578 u32 eax, ebx, ecx, edx;
2581 * syscall should always be enabled in longmode - so only become
2582 * vendor specific (cpuid) if other modes are active...
2584 if (ctxt->mode == X86EMUL_MODE_PROT64)
2589 ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
2591 * Intel ("GenuineIntel")
2592 * remark: Intel CPUs only support "syscall" in 64bit
2593 * longmode. Also an 64bit guest with a
2594 * 32bit compat-app running will #UD !! While this
2595 * behaviour can be fixed (by emulating) into AMD
2596 * response - CPUs of AMD can't behave like Intel.
2598 if (ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx &&
2599 ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx &&
2600 edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx)
2603 /* AMD ("AuthenticAMD") */
2604 if (ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx &&
2605 ecx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx &&
2606 edx == X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
2609 /* AMD ("AMDisbetter!") */
2610 if (ebx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ebx &&
2611 ecx == X86EMUL_CPUID_VENDOR_AMDisbetterI_ecx &&
2612 edx == X86EMUL_CPUID_VENDOR_AMDisbetterI_edx)
2615 /* default: (not Intel, not AMD), apply Intel's stricter rules... */
2619 static int em_syscall(struct x86_emulate_ctxt *ctxt)
2621 const struct x86_emulate_ops *ops = ctxt->ops;
2622 struct desc_struct cs, ss;
2627 /* syscall is not available in real mode */
2628 if (ctxt->mode == X86EMUL_MODE_REAL ||
2629 ctxt->mode == X86EMUL_MODE_VM86)
2630 return emulate_ud(ctxt);
2632 if (!(em_syscall_is_enabled(ctxt)))
2633 return emulate_ud(ctxt);
2635 ops->get_msr(ctxt, MSR_EFER, &efer);
2636 setup_syscalls_segments(ctxt, &cs, &ss);
2638 if (!(efer & EFER_SCE))
2639 return emulate_ud(ctxt);
2641 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2643 cs_sel = (u16)(msr_data & 0xfffc);
2644 ss_sel = (u16)(msr_data + 8);
2646 if (efer & EFER_LMA) {
2650 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2651 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2653 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2654 if (efer & EFER_LMA) {
2655 #ifdef CONFIG_X86_64
2656 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2659 ctxt->mode == X86EMUL_MODE_PROT64 ?
2660 MSR_LSTAR : MSR_CSTAR, &msr_data);
2661 ctxt->_eip = msr_data;
2663 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2664 ctxt->eflags &= ~msr_data;
2665 ctxt->eflags |= X86_EFLAGS_FIXED;
2669 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2670 ctxt->_eip = (u32)msr_data;
2672 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2675 return X86EMUL_CONTINUE;
2678 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2680 const struct x86_emulate_ops *ops = ctxt->ops;
2681 struct desc_struct cs, ss;
2686 ops->get_msr(ctxt, MSR_EFER, &efer);
2687 /* inject #GP if in real mode */
2688 if (ctxt->mode == X86EMUL_MODE_REAL)
2689 return emulate_gp(ctxt, 0);
2692 * Not recognized on AMD in compat mode (but is recognized in legacy
2695 if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
2696 && !vendor_intel(ctxt))
2697 return emulate_ud(ctxt);
2699 /* sysenter/sysexit have not been tested in 64bit mode. */
2700 if (ctxt->mode == X86EMUL_MODE_PROT64)
2701 return X86EMUL_UNHANDLEABLE;
2703 setup_syscalls_segments(ctxt, &cs, &ss);
2705 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2706 if ((msr_data & 0xfffc) == 0x0)
2707 return emulate_gp(ctxt, 0);
2709 ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
2710 cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
2711 ss_sel = cs_sel + 8;
2712 if (efer & EFER_LMA) {
2717 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2718 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2720 ops->get_msr(ctxt, MSR_IA32_SYSENTER_EIP, &msr_data);
2721 ctxt->_eip = (efer & EFER_LMA) ? msr_data : (u32)msr_data;
2723 ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
2724 *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
2727 return X86EMUL_CONTINUE;
2730 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2732 const struct x86_emulate_ops *ops = ctxt->ops;
2733 struct desc_struct cs, ss;
2734 u64 msr_data, rcx, rdx;
2736 u16 cs_sel = 0, ss_sel = 0;
2738 /* inject #GP if in real mode or Virtual 8086 mode */
2739 if (ctxt->mode == X86EMUL_MODE_REAL ||
2740 ctxt->mode == X86EMUL_MODE_VM86)
2741 return emulate_gp(ctxt, 0);
2743 setup_syscalls_segments(ctxt, &cs, &ss);
2745 if ((ctxt->rex_prefix & 0x8) != 0x0)
2746 usermode = X86EMUL_MODE_PROT64;
2748 usermode = X86EMUL_MODE_PROT32;
2750 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2751 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2755 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
2757 case X86EMUL_MODE_PROT32:
2758 cs_sel = (u16)(msr_data + 16);
2759 if ((msr_data & 0xfffc) == 0x0)
2760 return emulate_gp(ctxt, 0);
2761 ss_sel = (u16)(msr_data + 24);
2765 case X86EMUL_MODE_PROT64:
2766 cs_sel = (u16)(msr_data + 32);
2767 if (msr_data == 0x0)
2768 return emulate_gp(ctxt, 0);
2769 ss_sel = cs_sel + 8;
2772 if (is_noncanonical_address(rcx) ||
2773 is_noncanonical_address(rdx))
2774 return emulate_gp(ctxt, 0);
2777 cs_sel |= SEGMENT_RPL_MASK;
2778 ss_sel |= SEGMENT_RPL_MASK;
2780 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2781 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2784 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2786 return X86EMUL_CONTINUE;
2789 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
2792 if (ctxt->mode == X86EMUL_MODE_REAL)
2794 if (ctxt->mode == X86EMUL_MODE_VM86)
2796 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
2797 return ctxt->ops->cpl(ctxt) > iopl;
2800 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2803 const struct x86_emulate_ops *ops = ctxt->ops;
2804 struct desc_struct tr_seg;
2807 u16 tr, io_bitmap_ptr, perm, bit_idx = port & 0x7;
2808 unsigned mask = (1 << len) - 1;
2811 ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
2814 if (desc_limit_scaled(&tr_seg) < 103)
2816 base = get_desc_base(&tr_seg);
2817 #ifdef CONFIG_X86_64
2818 base |= ((u64)base3) << 32;
2820 r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
2821 if (r != X86EMUL_CONTINUE)
2823 if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
2825 r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
2826 if (r != X86EMUL_CONTINUE)
2828 if ((perm >> bit_idx) & mask)
2833 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2839 if (emulator_bad_iopl(ctxt))
2840 if (!emulator_io_port_access_allowed(ctxt, port, len))
2843 ctxt->perm_ok = true;
2848 static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
2851 * Intel CPUs mask the counter and pointers in quite strange
2852 * manner when ECX is zero due to REP-string optimizations.
2854 #ifdef CONFIG_X86_64
2855 if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
2858 *reg_write(ctxt, VCPU_REGS_RCX) = 0;
2861 case 0xa4: /* movsb */
2862 case 0xa5: /* movsd/w */
2863 *reg_rmw(ctxt, VCPU_REGS_RSI) &= (u32)-1;
2865 case 0xaa: /* stosb */
2866 case 0xab: /* stosd/w */
2867 *reg_rmw(ctxt, VCPU_REGS_RDI) &= (u32)-1;
2872 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2873 struct tss_segment_16 *tss)
2875 tss->ip = ctxt->_eip;
2876 tss->flag = ctxt->eflags;
2877 tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
2878 tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
2879 tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
2880 tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
2881 tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
2882 tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
2883 tss->si = reg_read(ctxt, VCPU_REGS_RSI);
2884 tss->di = reg_read(ctxt, VCPU_REGS_RDI);
2886 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
2887 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2888 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
2889 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
2890 tss->ldt = get_segment_selector(ctxt, VCPU_SREG_LDTR);
2893 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2894 struct tss_segment_16 *tss)
2899 ctxt->_eip = tss->ip;
2900 ctxt->eflags = tss->flag | 2;
2901 *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
2902 *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
2903 *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
2904 *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
2905 *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
2906 *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
2907 *reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
2908 *reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
2911 * SDM says that segment selectors are loaded before segment
2914 set_segment_selector(ctxt, tss->ldt, VCPU_SREG_LDTR);
2915 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
2916 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
2917 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
2918 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
2923 * Now load segment descriptors. If fault happens at this stage
2924 * it is handled in a context of new task
2926 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2927 X86_TRANSFER_TASK_SWITCH, NULL);
2928 if (ret != X86EMUL_CONTINUE)
2930 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2931 X86_TRANSFER_TASK_SWITCH, NULL);
2932 if (ret != X86EMUL_CONTINUE)
2934 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2935 X86_TRANSFER_TASK_SWITCH, NULL);
2936 if (ret != X86EMUL_CONTINUE)
2938 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2939 X86_TRANSFER_TASK_SWITCH, NULL);
2940 if (ret != X86EMUL_CONTINUE)
2942 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2943 X86_TRANSFER_TASK_SWITCH, NULL);
2944 if (ret != X86EMUL_CONTINUE)
2947 return X86EMUL_CONTINUE;
2950 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2951 u16 tss_selector, u16 old_tss_sel,
2952 ulong old_tss_base, struct desc_struct *new_desc)
2954 const struct x86_emulate_ops *ops = ctxt->ops;
2955 struct tss_segment_16 tss_seg;
2957 u32 new_tss_base = get_desc_base(new_desc);
2959 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2961 if (ret != X86EMUL_CONTINUE)
2964 save_state_to_tss16(ctxt, &tss_seg);
2966 ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2968 if (ret != X86EMUL_CONTINUE)
2971 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
2973 if (ret != X86EMUL_CONTINUE)
2976 if (old_tss_sel != 0xffff) {
2977 tss_seg.prev_task_link = old_tss_sel;
2979 ret = ops->write_std(ctxt, new_tss_base,
2980 &tss_seg.prev_task_link,
2981 sizeof tss_seg.prev_task_link,
2983 if (ret != X86EMUL_CONTINUE)
2987 return load_state_from_tss16(ctxt, &tss_seg);
2990 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2991 struct tss_segment_32 *tss)
2993 /* CR3 and ldt selector are not saved intentionally */
2994 tss->eip = ctxt->_eip;
2995 tss->eflags = ctxt->eflags;
2996 tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
2997 tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
2998 tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
2999 tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
3000 tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
3001 tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
3002 tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
3003 tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
3005 tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
3006 tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
3007 tss->ss = get_segment_selector(ctxt, VCPU_SREG_SS);
3008 tss->ds = get_segment_selector(ctxt, VCPU_SREG_DS);
3009 tss->fs = get_segment_selector(ctxt, VCPU_SREG_FS);
3010 tss->gs = get_segment_selector(ctxt, VCPU_SREG_GS);
3013 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
3014 struct tss_segment_32 *tss)
3019 if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
3020 return emulate_gp(ctxt, 0);
3021 ctxt->_eip = tss->eip;
3022 ctxt->eflags = tss->eflags | 2;
3024 /* General purpose registers */
3025 *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
3026 *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
3027 *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
3028 *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
3029 *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
3030 *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
3031 *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
3032 *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
3035 * SDM says that segment selectors are loaded before segment
3036 * descriptors. This is important because CPL checks will
3039 set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
3040 set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
3041 set_segment_selector(ctxt, tss->cs, VCPU_SREG_CS);
3042 set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
3043 set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
3044 set_segment_selector(ctxt, tss->fs, VCPU_SREG_FS);
3045 set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
3048 * If we're switching between Protected Mode and VM86, we need to make
3049 * sure to update the mode before loading the segment descriptors so
3050 * that the selectors are interpreted correctly.
3052 if (ctxt->eflags & X86_EFLAGS_VM) {
3053 ctxt->mode = X86EMUL_MODE_VM86;
3056 ctxt->mode = X86EMUL_MODE_PROT32;
3061 * Now load segment descriptors. If fault happenes at this stage
3062 * it is handled in a context of new task
3064 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
3065 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
3066 if (ret != X86EMUL_CONTINUE)
3068 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
3069 X86_TRANSFER_TASK_SWITCH, NULL);
3070 if (ret != X86EMUL_CONTINUE)
3072 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
3073 X86_TRANSFER_TASK_SWITCH, NULL);
3074 if (ret != X86EMUL_CONTINUE)
3076 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
3077 X86_TRANSFER_TASK_SWITCH, NULL);
3078 if (ret != X86EMUL_CONTINUE)
3080 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
3081 X86_TRANSFER_TASK_SWITCH, NULL);
3082 if (ret != X86EMUL_CONTINUE)
3084 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
3085 X86_TRANSFER_TASK_SWITCH, NULL);
3086 if (ret != X86EMUL_CONTINUE)
3088 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
3089 X86_TRANSFER_TASK_SWITCH, NULL);
3094 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
3095 u16 tss_selector, u16 old_tss_sel,
3096 ulong old_tss_base, struct desc_struct *new_desc)
3098 const struct x86_emulate_ops *ops = ctxt->ops;
3099 struct tss_segment_32 tss_seg;
3101 u32 new_tss_base = get_desc_base(new_desc);
3102 u32 eip_offset = offsetof(struct tss_segment_32, eip);
3103 u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
3105 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
3107 if (ret != X86EMUL_CONTINUE)
3110 save_state_to_tss32(ctxt, &tss_seg);
3112 /* Only GP registers and segment selectors are saved */
3113 ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
3114 ldt_sel_offset - eip_offset, &ctxt->exception);
3115 if (ret != X86EMUL_CONTINUE)
3118 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
3120 if (ret != X86EMUL_CONTINUE)
3123 if (old_tss_sel != 0xffff) {
3124 tss_seg.prev_task_link = old_tss_sel;
3126 ret = ops->write_std(ctxt, new_tss_base,
3127 &tss_seg.prev_task_link,
3128 sizeof tss_seg.prev_task_link,
3130 if (ret != X86EMUL_CONTINUE)
3134 return load_state_from_tss32(ctxt, &tss_seg);
3137 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
3138 u16 tss_selector, int idt_index, int reason,
3139 bool has_error_code, u32 error_code)
3141 const struct x86_emulate_ops *ops = ctxt->ops;
3142 struct desc_struct curr_tss_desc, next_tss_desc;
3144 u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
3145 ulong old_tss_base =
3146 ops->get_cached_segment_base(ctxt, VCPU_SREG_TR);
3148 ulong desc_addr, dr7;
3150 /* FIXME: old_tss_base == ~0 ? */
3152 ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc, &desc_addr);
3153 if (ret != X86EMUL_CONTINUE)
3155 ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc, &desc_addr);
3156 if (ret != X86EMUL_CONTINUE)
3159 /* FIXME: check that next_tss_desc is tss */
3162 * Check privileges. The three cases are task switch caused by...
3164 * 1. jmp/call/int to task gate: Check against DPL of the task gate
3165 * 2. Exception/IRQ/iret: No check is performed
3166 * 3. jmp/call to TSS/task-gate: No check is performed since the
3167 * hardware checks it before exiting.
3169 if (reason == TASK_SWITCH_GATE) {
3170 if (idt_index != -1) {
3171 /* Software interrupts */
3172 struct desc_struct task_gate_desc;
3175 ret = read_interrupt_descriptor(ctxt, idt_index,
3177 if (ret != X86EMUL_CONTINUE)
3180 dpl = task_gate_desc.dpl;
3181 if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
3182 return emulate_gp(ctxt, (idt_index << 3) | 0x2);
3186 desc_limit = desc_limit_scaled(&next_tss_desc);
3187 if (!next_tss_desc.p ||
3188 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
3189 desc_limit < 0x2b)) {
3190 return emulate_ts(ctxt, tss_selector & 0xfffc);
3193 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
3194 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
3195 write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc);
3198 if (reason == TASK_SWITCH_IRET)
3199 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
3201 /* set back link to prev task only if NT bit is set in eflags
3202 note that old_tss_sel is not used after this point */
3203 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
3204 old_tss_sel = 0xffff;
3206 if (next_tss_desc.type & 8)
3207 ret = task_switch_32(ctxt, tss_selector, old_tss_sel,
3208 old_tss_base, &next_tss_desc);
3210 ret = task_switch_16(ctxt, tss_selector, old_tss_sel,
3211 old_tss_base, &next_tss_desc);
3212 if (ret != X86EMUL_CONTINUE)
3215 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
3216 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
3218 if (reason != TASK_SWITCH_IRET) {
3219 next_tss_desc.type |= (1 << 1); /* set busy flag */
3220 write_segment_descriptor(ctxt, tss_selector, &next_tss_desc);
3223 ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS);
3224 ops->set_segment(ctxt, tss_selector, &next_tss_desc, 0, VCPU_SREG_TR);
3226 if (has_error_code) {
3227 ctxt->op_bytes = ctxt->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
3228 ctxt->lock_prefix = 0;
3229 ctxt->src.val = (unsigned long) error_code;
3230 ret = em_push(ctxt);
3233 ops->get_dr(ctxt, 7, &dr7);
3234 ops->set_dr(ctxt, 7, dr7 & ~(DR_LOCAL_ENABLE_MASK | DR_LOCAL_SLOWDOWN));
3239 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
3240 u16 tss_selector, int idt_index, int reason,
3241 bool has_error_code, u32 error_code)
3245 invalidate_registers(ctxt);
3246 ctxt->_eip = ctxt->eip;
3247 ctxt->dst.type = OP_NONE;
3249 rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
3250 has_error_code, error_code);
3252 if (rc == X86EMUL_CONTINUE) {
3253 ctxt->eip = ctxt->_eip;
3254 writeback_registers(ctxt);
3257 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3260 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
3263 int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
3265 register_address_increment(ctxt, reg, df * op->bytes);
3266 op->addr.mem.ea = register_address(ctxt, reg);
3269 static int em_das(struct x86_emulate_ctxt *ctxt)
3272 bool af, cf, old_cf;
3274 cf = ctxt->eflags & X86_EFLAGS_CF;
3280 af = ctxt->eflags & X86_EFLAGS_AF;
3281 if ((al & 0x0f) > 9 || af) {
3283 cf = old_cf | (al >= 250);
3288 if (old_al > 0x99 || old_cf) {
3294 /* Set PF, ZF, SF */
3295 ctxt->src.type = OP_IMM;
3297 ctxt->src.bytes = 1;
3298 fastop(ctxt, em_or);
3299 ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
3301 ctxt->eflags |= X86_EFLAGS_CF;
3303 ctxt->eflags |= X86_EFLAGS_AF;
3304 return X86EMUL_CONTINUE;
3307 static int em_aam(struct x86_emulate_ctxt *ctxt)
3311 if (ctxt->src.val == 0)
3312 return emulate_de(ctxt);
3314 al = ctxt->dst.val & 0xff;
3315 ah = al / ctxt->src.val;
3316 al %= ctxt->src.val;
3318 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al | (ah << 8);
3320 /* Set PF, ZF, SF */
3321 ctxt->src.type = OP_IMM;
3323 ctxt->src.bytes = 1;
3324 fastop(ctxt, em_or);
3326 return X86EMUL_CONTINUE;
3329 static int em_aad(struct x86_emulate_ctxt *ctxt)
3331 u8 al = ctxt->dst.val & 0xff;
3332 u8 ah = (ctxt->dst.val >> 8) & 0xff;
3334 al = (al + (ah * ctxt->src.val)) & 0xff;
3336 ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al;
3338 /* Set PF, ZF, SF */
3339 ctxt->src.type = OP_IMM;
3341 ctxt->src.bytes = 1;
3342 fastop(ctxt, em_or);
3344 return X86EMUL_CONTINUE;
3347 static int em_call(struct x86_emulate_ctxt *ctxt)
3350 long rel = ctxt->src.val;
3352 ctxt->src.val = (unsigned long)ctxt->_eip;
3353 rc = jmp_rel(ctxt, rel);
3354 if (rc != X86EMUL_CONTINUE)
3356 return em_push(ctxt);
3359 static int em_call_far(struct x86_emulate_ctxt *ctxt)
3364 struct desc_struct old_desc, new_desc;
3365 const struct x86_emulate_ops *ops = ctxt->ops;
3366 int cpl = ctxt->ops->cpl(ctxt);
3367 enum x86emul_mode prev_mode = ctxt->mode;
3369 old_eip = ctxt->_eip;
3370 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3372 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3373 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3374 X86_TRANSFER_CALL_JMP, &new_desc);
3375 if (rc != X86EMUL_CONTINUE)
3378 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3379 if (rc != X86EMUL_CONTINUE)
3382 ctxt->src.val = old_cs;
3384 if (rc != X86EMUL_CONTINUE)
3387 ctxt->src.val = old_eip;
3389 /* If we failed, we tainted the memory, but the very least we should
3391 if (rc != X86EMUL_CONTINUE) {
3392 pr_warn_once("faulting far call emulation tainted memory\n");
3397 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3398 ctxt->mode = prev_mode;
3403 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
3408 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
3409 if (rc != X86EMUL_CONTINUE)
3411 rc = assign_eip_near(ctxt, eip);
3412 if (rc != X86EMUL_CONTINUE)
3414 rsp_increment(ctxt, ctxt->src.val);
3415 return X86EMUL_CONTINUE;
3418 static int em_xchg(struct x86_emulate_ctxt *ctxt)
3420 /* Write back the register source. */
3421 ctxt->src.val = ctxt->dst.val;
3422 write_register_operand(&ctxt->src);
3424 /* Write back the memory destination with implicit LOCK prefix. */
3425 ctxt->dst.val = ctxt->src.orig_val;
3426 ctxt->lock_prefix = 1;
3427 return X86EMUL_CONTINUE;
3430 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
3432 ctxt->dst.val = ctxt->src2.val;
3433 return fastop(ctxt, em_imul);
3436 static int em_cwd(struct x86_emulate_ctxt *ctxt)
3438 ctxt->dst.type = OP_REG;
3439 ctxt->dst.bytes = ctxt->src.bytes;
3440 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
3441 ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
3443 return X86EMUL_CONTINUE;
3446 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
3450 ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
3451 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
3452 *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
3453 return X86EMUL_CONTINUE;
3456 static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
3460 if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
3461 return emulate_gp(ctxt, 0);
3462 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
3463 *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
3464 return X86EMUL_CONTINUE;
3467 static int em_mov(struct x86_emulate_ctxt *ctxt)
3469 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
3470 return X86EMUL_CONTINUE;
3473 #define FFL(x) bit(X86_FEATURE_##x)
3475 static int em_movbe(struct x86_emulate_ctxt *ctxt)
3477 u32 ebx, ecx, edx, eax = 1;
3481 * Check MOVBE is set in the guest-visible CPUID leaf.
3483 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3484 if (!(ecx & FFL(MOVBE)))
3485 return emulate_ud(ctxt);
3487 switch (ctxt->op_bytes) {
3490 * From MOVBE definition: "...When the operand size is 16 bits,
3491 * the upper word of the destination register remains unchanged
3494 * Both casting ->valptr and ->val to u16 breaks strict aliasing
3495 * rules so we have to do the operation almost per hand.
3497 tmp = (u16)ctxt->src.val;
3498 ctxt->dst.val &= ~0xffffUL;
3499 ctxt->dst.val |= (unsigned long)swab16(tmp);
3502 ctxt->dst.val = swab32((u32)ctxt->src.val);
3505 ctxt->dst.val = swab64(ctxt->src.val);
3510 return X86EMUL_CONTINUE;
3513 static int em_cr_write(struct x86_emulate_ctxt *ctxt)
3515 if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
3516 return emulate_gp(ctxt, 0);
3518 /* Disable writeback. */
3519 ctxt->dst.type = OP_NONE;
3520 return X86EMUL_CONTINUE;
3523 static int em_dr_write(struct x86_emulate_ctxt *ctxt)
3527 if (ctxt->mode == X86EMUL_MODE_PROT64)
3528 val = ctxt->src.val & ~0ULL;
3530 val = ctxt->src.val & ~0U;
3532 /* #UD condition is already handled. */
3533 if (ctxt->ops->set_dr(ctxt, ctxt->modrm_reg, val) < 0)
3534 return emulate_gp(ctxt, 0);
3536 /* Disable writeback. */
3537 ctxt->dst.type = OP_NONE;
3538 return X86EMUL_CONTINUE;
3541 static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
3545 msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
3546 | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
3547 if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
3548 return emulate_gp(ctxt, 0);
3550 return X86EMUL_CONTINUE;
3553 static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
3557 if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
3558 return emulate_gp(ctxt, 0);
3560 *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
3561 *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
3562 return X86EMUL_CONTINUE;
3565 static int em_mov_rm_sreg(struct x86_emulate_ctxt *ctxt)
3567 if (ctxt->modrm_reg > VCPU_SREG_GS)
3568 return emulate_ud(ctxt);
3570 ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
3571 if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
3572 ctxt->dst.bytes = 2;
3573 return X86EMUL_CONTINUE;
3576 static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
3578 u16 sel = ctxt->src.val;
3580 if (ctxt->modrm_reg == VCPU_SREG_CS || ctxt->modrm_reg > VCPU_SREG_GS)
3581 return emulate_ud(ctxt);
3583 if (ctxt->modrm_reg == VCPU_SREG_SS)
3584 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3586 /* Disable writeback. */
3587 ctxt->dst.type = OP_NONE;
3588 return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
3591 static int em_lldt(struct x86_emulate_ctxt *ctxt)
3593 u16 sel = ctxt->src.val;
3595 /* Disable writeback. */
3596 ctxt->dst.type = OP_NONE;
3597 return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
3600 static int em_ltr(struct x86_emulate_ctxt *ctxt)
3602 u16 sel = ctxt->src.val;
3604 /* Disable writeback. */
3605 ctxt->dst.type = OP_NONE;
3606 return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
3609 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
3614 rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3615 if (rc == X86EMUL_CONTINUE)
3616 ctxt->ops->invlpg(ctxt, linear);
3617 /* Disable writeback. */
3618 ctxt->dst.type = OP_NONE;
3619 return X86EMUL_CONTINUE;
3622 static int em_clts(struct x86_emulate_ctxt *ctxt)
3626 cr0 = ctxt->ops->get_cr(ctxt, 0);
3628 ctxt->ops->set_cr(ctxt, 0, cr0);
3629 return X86EMUL_CONTINUE;
3632 static int em_hypercall(struct x86_emulate_ctxt *ctxt)
3634 int rc = ctxt->ops->fix_hypercall(ctxt);
3636 if (rc != X86EMUL_CONTINUE)
3639 /* Let the processor re-execute the fixed hypercall */
3640 ctxt->_eip = ctxt->eip;
3641 /* Disable writeback. */
3642 ctxt->dst.type = OP_NONE;
3643 return X86EMUL_CONTINUE;
3646 static int emulate_store_desc_ptr(struct x86_emulate_ctxt *ctxt,
3647 void (*get)(struct x86_emulate_ctxt *ctxt,
3648 struct desc_ptr *ptr))
3650 struct desc_ptr desc_ptr;
3652 if (ctxt->mode == X86EMUL_MODE_PROT64)
3654 get(ctxt, &desc_ptr);
3655 if (ctxt->op_bytes == 2) {
3657 desc_ptr.address &= 0x00ffffff;
3659 /* Disable writeback. */
3660 ctxt->dst.type = OP_NONE;
3661 return segmented_write(ctxt, ctxt->dst.addr.mem,
3662 &desc_ptr, 2 + ctxt->op_bytes);
3665 static int em_sgdt(struct x86_emulate_ctxt *ctxt)
3667 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_gdt);
3670 static int em_sidt(struct x86_emulate_ctxt *ctxt)
3672 return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
3675 static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
3677 struct desc_ptr desc_ptr;
3680 if (ctxt->mode == X86EMUL_MODE_PROT64)
3682 rc = read_descriptor(ctxt, ctxt->src.addr.mem,
3683 &desc_ptr.size, &desc_ptr.address,
3685 if (rc != X86EMUL_CONTINUE)
3687 if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3688 is_noncanonical_address(desc_ptr.address))
3689 return emulate_gp(ctxt, 0);
3691 ctxt->ops->set_gdt(ctxt, &desc_ptr);
3693 ctxt->ops->set_idt(ctxt, &desc_ptr);
3694 /* Disable writeback. */
3695 ctxt->dst.type = OP_NONE;
3696 return X86EMUL_CONTINUE;
3699 static int em_lgdt(struct x86_emulate_ctxt *ctxt)
3701 return em_lgdt_lidt(ctxt, true);
3704 static int em_lidt(struct x86_emulate_ctxt *ctxt)
3706 return em_lgdt_lidt(ctxt, false);
3709 static int em_smsw(struct x86_emulate_ctxt *ctxt)
3711 if (ctxt->dst.type == OP_MEM)
3712 ctxt->dst.bytes = 2;
3713 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3714 return X86EMUL_CONTINUE;
3717 static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3719 ctxt->ops->set_cr(ctxt, 0, (ctxt->ops->get_cr(ctxt, 0) & ~0x0eul)
3720 | (ctxt->src.val & 0x0f));
3721 ctxt->dst.type = OP_NONE;
3722 return X86EMUL_CONTINUE;
3725 static int em_loop(struct x86_emulate_ctxt *ctxt)
3727 int rc = X86EMUL_CONTINUE;
3729 register_address_increment(ctxt, VCPU_REGS_RCX, -1);
3730 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3731 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3732 rc = jmp_rel(ctxt, ctxt->src.val);
3737 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3739 int rc = X86EMUL_CONTINUE;
3741 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3742 rc = jmp_rel(ctxt, ctxt->src.val);
3747 static int em_in(struct x86_emulate_ctxt *ctxt)
3749 if (!pio_in_emulated(ctxt, ctxt->dst.bytes, ctxt->src.val,
3751 return X86EMUL_IO_NEEDED;
3753 return X86EMUL_CONTINUE;
3756 static int em_out(struct x86_emulate_ctxt *ctxt)
3758 ctxt->ops->pio_out_emulated(ctxt, ctxt->src.bytes, ctxt->dst.val,
3760 /* Disable writeback. */
3761 ctxt->dst.type = OP_NONE;
3762 return X86EMUL_CONTINUE;
3765 static int em_cli(struct x86_emulate_ctxt *ctxt)
3767 if (emulator_bad_iopl(ctxt))
3768 return emulate_gp(ctxt, 0);
3770 ctxt->eflags &= ~X86_EFLAGS_IF;
3771 return X86EMUL_CONTINUE;
3774 static int em_sti(struct x86_emulate_ctxt *ctxt)
3776 if (emulator_bad_iopl(ctxt))
3777 return emulate_gp(ctxt, 0);
3779 ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3780 ctxt->eflags |= X86_EFLAGS_IF;
3781 return X86EMUL_CONTINUE;
3784 static int em_cpuid(struct x86_emulate_ctxt *ctxt)
3786 u32 eax, ebx, ecx, edx;
3788 eax = reg_read(ctxt, VCPU_REGS_RAX);
3789 ecx = reg_read(ctxt, VCPU_REGS_RCX);
3790 ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
3791 *reg_write(ctxt, VCPU_REGS_RAX) = eax;
3792 *reg_write(ctxt, VCPU_REGS_RBX) = ebx;
3793 *reg_write(ctxt, VCPU_REGS_RCX) = ecx;
3794 *reg_write(ctxt, VCPU_REGS_RDX) = edx;
3795 return X86EMUL_CONTINUE;
3798 static int em_sahf(struct x86_emulate_ctxt *ctxt)
3802 flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
3804 flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
3806 ctxt->eflags &= ~0xffUL;
3807 ctxt->eflags |= flags | X86_EFLAGS_FIXED;
3808 return X86EMUL_CONTINUE;
3811 static int em_lahf(struct x86_emulate_ctxt *ctxt)
3813 *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
3814 *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
3815 return X86EMUL_CONTINUE;
3818 static int em_bswap(struct x86_emulate_ctxt *ctxt)
3820 switch (ctxt->op_bytes) {
3821 #ifdef CONFIG_X86_64
3823 asm("bswap %0" : "+r"(ctxt->dst.val));
3827 asm("bswap %0" : "+r"(*(u32 *)&ctxt->dst.val));
3830 return X86EMUL_CONTINUE;
3833 static int em_clflush(struct x86_emulate_ctxt *ctxt)
3835 /* emulating clflush regardless of cpuid */
3836 return X86EMUL_CONTINUE;
3839 static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3841 ctxt->dst.val = (s32) ctxt->src.val;
3842 return X86EMUL_CONTINUE;
3845 static bool valid_cr(int nr)
3857 static int check_cr_read(struct x86_emulate_ctxt *ctxt)
3859 if (!valid_cr(ctxt->modrm_reg))
3860 return emulate_ud(ctxt);
3862 return X86EMUL_CONTINUE;
3865 static int check_cr_write(struct x86_emulate_ctxt *ctxt)
3867 u64 new_val = ctxt->src.val64;
3868 int cr = ctxt->modrm_reg;
3871 static u64 cr_reserved_bits[] = {
3872 0xffffffff00000000ULL,
3873 0, 0, 0, /* CR3 checked later */
3880 return emulate_ud(ctxt);
3882 if (new_val & cr_reserved_bits[cr])
3883 return emulate_gp(ctxt, 0);
3888 if (((new_val & X86_CR0_PG) && !(new_val & X86_CR0_PE)) ||
3889 ((new_val & X86_CR0_NW) && !(new_val & X86_CR0_CD)))
3890 return emulate_gp(ctxt, 0);
3892 cr4 = ctxt->ops->get_cr(ctxt, 4);
3893 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3895 if ((new_val & X86_CR0_PG) && (efer & EFER_LME) &&
3896 !(cr4 & X86_CR4_PAE))
3897 return emulate_gp(ctxt, 0);
3904 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3905 if (efer & EFER_LMA)
3906 rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
3909 return emulate_gp(ctxt, 0);
3914 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3916 if ((efer & EFER_LMA) && !(new_val & X86_CR4_PAE))
3917 return emulate_gp(ctxt, 0);
3923 return X86EMUL_CONTINUE;
3926 static int check_dr7_gd(struct x86_emulate_ctxt *ctxt)
3930 ctxt->ops->get_dr(ctxt, 7, &dr7);
3932 /* Check if DR7.Global_Enable is set */
3933 return dr7 & (1 << 13);
3936 static int check_dr_read(struct x86_emulate_ctxt *ctxt)
3938 int dr = ctxt->modrm_reg;
3942 return emulate_ud(ctxt);
3944 cr4 = ctxt->ops->get_cr(ctxt, 4);
3945 if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
3946 return emulate_ud(ctxt);
3948 if (check_dr7_gd(ctxt)) {
3951 ctxt->ops->get_dr(ctxt, 6, &dr6);
3953 dr6 |= DR6_BD | DR6_RTM;
3954 ctxt->ops->set_dr(ctxt, 6, dr6);
3955 return emulate_db(ctxt);
3958 return X86EMUL_CONTINUE;
3961 static int check_dr_write(struct x86_emulate_ctxt *ctxt)
3963 u64 new_val = ctxt->src.val64;
3964 int dr = ctxt->modrm_reg;
3966 if ((dr == 6 || dr == 7) && (new_val & 0xffffffff00000000ULL))
3967 return emulate_gp(ctxt, 0);
3969 return check_dr_read(ctxt);
3972 static int check_svme(struct x86_emulate_ctxt *ctxt)
3976 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
3978 if (!(efer & EFER_SVME))
3979 return emulate_ud(ctxt);
3981 return X86EMUL_CONTINUE;
3984 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
3986 u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
3988 /* Valid physical address? */
3989 if (rax & 0xffff000000000000ULL)
3990 return emulate_gp(ctxt, 0);
3992 return check_svme(ctxt);
3995 static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
3997 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
3999 if (cr4 & X86_CR4_TSD && ctxt->ops->cpl(ctxt))
4000 return emulate_ud(ctxt);
4002 return X86EMUL_CONTINUE;
4005 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
4007 u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
4008 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
4010 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
4011 ctxt->ops->check_pmc(ctxt, rcx))
4012 return emulate_gp(ctxt, 0);
4014 return X86EMUL_CONTINUE;
4017 static int check_perm_in(struct x86_emulate_ctxt *ctxt)
4019 ctxt->dst.bytes = min(ctxt->dst.bytes, 4u);
4020 if (!emulator_io_permited(ctxt, ctxt->src.val, ctxt->dst.bytes))
4021 return emulate_gp(ctxt, 0);
4023 return X86EMUL_CONTINUE;
4026 static int check_perm_out(struct x86_emulate_ctxt *ctxt)
4028 ctxt->src.bytes = min(ctxt->src.bytes, 4u);
4029 if (!emulator_io_permited(ctxt, ctxt->dst.val, ctxt->src.bytes))
4030 return emulate_gp(ctxt, 0);
4032 return X86EMUL_CONTINUE;
4035 #define D(_y) { .flags = (_y) }
4036 #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
4037 #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
4038 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4039 #define N D(NotImpl)
4040 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
4041 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
4042 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
4043 #define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
4044 #define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
4045 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
4046 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
4047 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
4048 #define II(_f, _e, _i) \
4049 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
4050 #define IIP(_f, _e, _i, _p) \
4051 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
4052 .intercept = x86_intercept_##_i, .check_perm = (_p) }
4053 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
4055 #define D2bv(_f) D((_f) | ByteOp), D(_f)
4056 #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p)
4057 #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e)
4058 #define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e)
4059 #define I2bvIP(_f, _e, _i, _p) \
4060 IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p)
4062 #define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \
4063 F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
4064 F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
4066 static const struct opcode group7_rm0[] = {
4068 I(SrcNone | Priv | EmulateOnUD, em_hypercall),
4072 static const struct opcode group7_rm1[] = {
4073 DI(SrcNone | Priv, monitor),
4074 DI(SrcNone | Priv, mwait),
4078 static const struct opcode group7_rm3[] = {
4079 DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
4080 II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
4081 DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
4082 DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
4083 DIP(SrcNone | Prot | Priv, stgi, check_svme),
4084 DIP(SrcNone | Prot | Priv, clgi, check_svme),
4085 DIP(SrcNone | Prot | Priv, skinit, check_svme),
4086 DIP(SrcNone | Prot | Priv, invlpga, check_svme),
4089 static const struct opcode group7_rm7[] = {
4091 DIP(SrcNone, rdtscp, check_rdtsc),
4095 static const struct opcode group1[] = {
4097 F(Lock | PageTable, em_or),
4100 F(Lock | PageTable, em_and),
4106 static const struct opcode group1A[] = {
4107 I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N,
4110 static const struct opcode group2[] = {
4111 F(DstMem | ModRM, em_rol),
4112 F(DstMem | ModRM, em_ror),
4113 F(DstMem | ModRM, em_rcl),
4114 F(DstMem | ModRM, em_rcr),
4115 F(DstMem | ModRM, em_shl),
4116 F(DstMem | ModRM, em_shr),
4117 F(DstMem | ModRM, em_shl),
4118 F(DstMem | ModRM, em_sar),
4121 static const struct opcode group3[] = {
4122 F(DstMem | SrcImm | NoWrite, em_test),
4123 F(DstMem | SrcImm | NoWrite, em_test),
4124 F(DstMem | SrcNone | Lock, em_not),
4125 F(DstMem | SrcNone | Lock, em_neg),
4126 F(DstXacc | Src2Mem, em_mul_ex),
4127 F(DstXacc | Src2Mem, em_imul_ex),
4128 F(DstXacc | Src2Mem, em_div_ex),
4129 F(DstXacc | Src2Mem, em_idiv_ex),
4132 static const struct opcode group4[] = {
4133 F(ByteOp | DstMem | SrcNone | Lock, em_inc),
4134 F(ByteOp | DstMem | SrcNone | Lock, em_dec),
4138 static const struct opcode group5[] = {
4139 F(DstMem | SrcNone | Lock, em_inc),
4140 F(DstMem | SrcNone | Lock, em_dec),
4141 I(SrcMem | NearBranch, em_call_near_abs),
4142 I(SrcMemFAddr | ImplicitOps, em_call_far),
4143 I(SrcMem | NearBranch, em_jmp_abs),
4144 I(SrcMemFAddr | ImplicitOps, em_jmp_far),
4145 I(SrcMem | Stack, em_push), D(Undefined),
4148 static const struct opcode group6[] = {
4149 DI(Prot | DstMem, sldt),
4150 DI(Prot | DstMem, str),
4151 II(Prot | Priv | SrcMem16, em_lldt, lldt),
4152 II(Prot | Priv | SrcMem16, em_ltr, ltr),
4156 static const struct group_dual group7 = { {
4157 II(Mov | DstMem, em_sgdt, sgdt),
4158 II(Mov | DstMem, em_sidt, sidt),
4159 II(SrcMem | Priv, em_lgdt, lgdt),
4160 II(SrcMem | Priv, em_lidt, lidt),
4161 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4162 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4163 II(SrcMem | ByteOp | Priv | NoAccess, em_invlpg, invlpg),
4167 N, EXT(0, group7_rm3),
4168 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
4169 II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
4173 static const struct opcode group8[] = {
4175 F(DstMem | SrcImmByte | NoWrite, em_bt),
4176 F(DstMem | SrcImmByte | Lock | PageTable, em_bts),
4177 F(DstMem | SrcImmByte | Lock, em_btr),
4178 F(DstMem | SrcImmByte | Lock | PageTable, em_btc),
4181 static const struct group_dual group9 = { {
4182 N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
4184 N, N, N, N, N, N, N, N,
4187 static const struct opcode group11[] = {
4188 I(DstMem | SrcImm | Mov | PageTable, em_mov),
4192 static const struct gprefix pfx_0f_ae_7 = {
4193 I(SrcMem | ByteOp, em_clflush), N, N, N,
4196 static const struct group_dual group15 = { {
4197 N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
4199 N, N, N, N, N, N, N, N,
4202 static const struct gprefix pfx_0f_6f_0f_7f = {
4203 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
4206 static const struct instr_dual instr_dual_0f_2b = {
4210 static const struct gprefix pfx_0f_2b = {
4211 ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
4214 static const struct gprefix pfx_0f_28_0f_29 = {
4215 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
4218 static const struct gprefix pfx_0f_e7 = {
4219 N, I(Sse, em_mov), N, N,
4222 static const struct escape escape_d9 = { {
4223 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
4226 N, N, N, N, N, N, N, N,
4228 N, N, N, N, N, N, N, N,
4230 N, N, N, N, N, N, N, N,
4232 N, N, N, N, N, N, N, N,
4234 N, N, N, N, N, N, N, N,
4236 N, N, N, N, N, N, N, N,
4238 N, N, N, N, N, N, N, N,
4240 N, N, N, N, N, N, N, N,
4243 static const struct escape escape_db = { {
4244 N, N, N, N, N, N, N, N,
4247 N, N, N, N, N, N, N, N,
4249 N, N, N, N, N, N, N, N,
4251 N, N, N, N, N, N, N, N,
4253 N, N, N, N, N, N, N, N,
4255 N, N, N, I(ImplicitOps, em_fninit), N, N, N, N,
4257 N, N, N, N, N, N, N, N,
4259 N, N, N, N, N, N, N, N,
4261 N, N, N, N, N, N, N, N,
4264 static const struct escape escape_dd = { {
4265 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
4268 N, N, N, N, N, N, N, N,
4270 N, N, N, N, N, N, N, N,
4272 N, N, N, N, N, N, N, N,
4274 N, N, N, N, N, N, N, N,
4276 N, N, N, N, N, N, N, N,
4278 N, N, N, N, N, N, N, N,
4280 N, N, N, N, N, N, N, N,
4282 N, N, N, N, N, N, N, N,
4285 static const struct instr_dual instr_dual_0f_c3 = {
4286 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
4289 static const struct mode_dual mode_dual_63 = {
4290 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
4293 static const struct opcode opcode_table[256] = {
4295 F6ALU(Lock, em_add),
4296 I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
4297 I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg),
4299 F6ALU(Lock | PageTable, em_or),
4300 I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg),
4303 F6ALU(Lock, em_adc),
4304 I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg),
4305 I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg),
4307 F6ALU(Lock, em_sbb),
4308 I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg),
4309 I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg),
4311 F6ALU(Lock | PageTable, em_and), N, N,
4313 F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das),
4315 F6ALU(Lock, em_xor), N, N,
4317 F6ALU(NoWrite, em_cmp), N, N,
4319 X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)),
4321 X8(I(SrcReg | Stack, em_push)),
4323 X8(I(DstReg | Stack, em_pop)),
4325 I(ImplicitOps | Stack | No64, em_pusha),
4326 I(ImplicitOps | Stack | No64, em_popa),
4327 N, MD(ModRM, &mode_dual_63),
4330 I(SrcImm | Mov | Stack, em_push),
4331 I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
4332 I(SrcImmByte | Mov | Stack, em_push),
4333 I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
4334 I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
4335 I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
4337 X16(D(SrcImmByte | NearBranch)),
4339 G(ByteOp | DstMem | SrcImm, group1),
4340 G(DstMem | SrcImm, group1),
4341 G(ByteOp | DstMem | SrcImm | No64, group1),
4342 G(DstMem | SrcImmByte, group1),
4343 F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test),
4344 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg),
4346 I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov),
4347 I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
4348 I(DstMem | SrcNone | ModRM | Mov | PageTable, em_mov_rm_sreg),
4349 D(ModRM | SrcMem | NoAccess | DstReg),
4350 I(ImplicitOps | SrcMem16 | ModRM, em_mov_sreg_rm),
4353 DI(SrcAcc | DstReg, pause), X7(D(SrcAcc | DstReg)),
4355 D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
4356 I(SrcImmFAddr | No64, em_call_far), N,
4357 II(ImplicitOps | Stack, em_pushf, pushf),
4358 II(ImplicitOps | Stack, em_popf, popf),
4359 I(ImplicitOps, em_sahf), I(ImplicitOps, em_lahf),
4361 I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
4362 I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
4363 I2bv(SrcSI | DstDI | Mov | String, em_mov),
4364 F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
4366 F2bv(DstAcc | SrcImm | NoWrite, em_test),
4367 I2bv(SrcAcc | DstDI | Mov | String, em_mov),
4368 I2bv(SrcSI | DstAcc | Mov | String, em_mov),
4369 F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
4371 X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
4373 X8(I(DstReg | SrcImm64 | Mov, em_mov)),
4375 G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
4376 I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
4377 I(ImplicitOps | NearBranch, em_ret),
4378 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
4379 I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
4380 G(ByteOp, group11), G(0, group11),
4382 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4383 I(ImplicitOps | SrcImmU16, em_ret_far_imm),
4384 I(ImplicitOps, em_ret_far),
4385 D(ImplicitOps), DI(SrcImmByte, intn),
4386 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4388 G(Src2One | ByteOp, group2), G(Src2One, group2),
4389 G(Src2CL | ByteOp, group2), G(Src2CL, group2),
4390 I(DstAcc | SrcImmUByte | No64, em_aam),
4391 I(DstAcc | SrcImmUByte | No64, em_aad),
4392 F(DstAcc | ByteOp | No64, em_salc),
4393 I(DstAcc | SrcXLat | ByteOp, em_mov),
4395 N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
4397 X3(I(SrcImmByte | NearBranch, em_loop)),
4398 I(SrcImmByte | NearBranch, em_jcxz),
4399 I2bvIP(SrcImmUByte | DstAcc, em_in, in, check_perm_in),
4400 I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
4402 I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
4403 I(SrcImmFAddr | No64, em_jmp_far),
4404 D(SrcImmByte | ImplicitOps | NearBranch),
4405 I2bvIP(SrcDX | DstAcc, em_in, in, check_perm_in),
4406 I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
4408 N, DI(ImplicitOps, icebp), N, N,
4409 DI(ImplicitOps | Priv, hlt), D(ImplicitOps),
4410 G(ByteOp, group3), G(0, group3),
4412 D(ImplicitOps), D(ImplicitOps),
4413 I(ImplicitOps, em_cli), I(ImplicitOps, em_sti),
4414 D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
4417 static const struct opcode twobyte_table[256] = {
4419 G(0, group6), GD(0, &group7), N, N,
4420 N, I(ImplicitOps | EmulateOnUD, em_syscall),
4421 II(ImplicitOps | Priv, em_clts, clts), N,
4422 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
4423 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
4425 N, N, N, N, N, N, N, N,
4426 D(ImplicitOps | ModRM | SrcMem | NoAccess),
4427 N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
4429 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
4430 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
4431 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_cr_write, cr_write,
4433 IIP(ModRM | SrcMem | Priv | Op3264 | NoMod, em_dr_write, dr_write,
4436 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
4437 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
4438 N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
4441 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
4442 IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
4443 II(ImplicitOps | Priv, em_rdmsr, rdmsr),
4444 IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
4445 I(ImplicitOps | EmulateOnUD, em_sysenter),
4446 I(ImplicitOps | Priv | EmulateOnUD, em_sysexit),
4448 N, N, N, N, N, N, N, N,
4450 X16(D(DstReg | SrcMem | ModRM)),
4452 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4457 N, N, N, GP(SrcMem | DstReg | ModRM | Mov, &pfx_0f_6f_0f_7f),
4462 N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
4464 X16(D(SrcImm | NearBranch)),
4466 X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
4468 I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg),
4469 II(ImplicitOps, em_cpuid, cpuid),
4470 F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt),
4471 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld),
4472 F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
4474 I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
4475 II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
4476 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
4477 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
4478 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4479 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4481 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4482 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4483 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4484 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
4485 I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
4486 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4490 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
4491 I(DstReg | SrcMem | ModRM, em_bsf_c),
4492 I(DstReg | SrcMem | ModRM, em_bsr_c),
4493 D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
4495 F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
4496 N, ID(0, &instr_dual_0f_c3),
4497 N, N, N, GD(0, &group9),
4499 X8(I(DstReg, em_bswap)),
4501 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
4503 N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
4504 N, N, N, N, N, N, N, N,
4506 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
4509 static const struct instr_dual instr_dual_0f_38_f0 = {
4510 I(DstReg | SrcMem | Mov, em_movbe), N
4513 static const struct instr_dual instr_dual_0f_38_f1 = {
4514 I(DstMem | SrcReg | Mov, em_movbe), N
4517 static const struct gprefix three_byte_0f_38_f0 = {
4518 ID(0, &instr_dual_0f_38_f0), N, N, N
4521 static const struct gprefix three_byte_0f_38_f1 = {
4522 ID(0, &instr_dual_0f_38_f1), N, N, N
4526 * Insns below are selected by the prefix which indexed by the third opcode
4529 static const struct opcode opcode_map_0f_38[256] = {
4531 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4533 X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
4535 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
4536 GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
4557 static unsigned imm_size(struct x86_emulate_ctxt *ctxt)
4561 size = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4567 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
4568 unsigned size, bool sign_extension)
4570 int rc = X86EMUL_CONTINUE;
4574 op->addr.mem.ea = ctxt->_eip;
4575 /* NB. Immediates are sign-extended as necessary. */
4576 switch (op->bytes) {
4578 op->val = insn_fetch(s8, ctxt);
4581 op->val = insn_fetch(s16, ctxt);
4584 op->val = insn_fetch(s32, ctxt);
4587 op->val = insn_fetch(s64, ctxt);
4590 if (!sign_extension) {
4591 switch (op->bytes) {
4599 op->val &= 0xffffffff;
4607 static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4610 int rc = X86EMUL_CONTINUE;
4614 decode_register_operand(ctxt, op);
4617 rc = decode_imm(ctxt, op, 1, false);
4620 ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4624 if (ctxt->d & BitOp)
4625 fetch_bit_operand(ctxt);
4626 op->orig_val = op->val;
4629 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4633 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4634 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4635 fetch_register_operand(op);
4636 op->orig_val = op->val;
4640 op->bytes = (ctxt->d & ByteOp) ? 2 : ctxt->op_bytes;
4641 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
4642 fetch_register_operand(op);
4643 op->orig_val = op->val;
4646 if (ctxt->d & ByteOp) {
4651 op->bytes = ctxt->op_bytes;
4652 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4653 fetch_register_operand(op);
4654 op->orig_val = op->val;
4658 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4660 register_address(ctxt, VCPU_REGS_RDI);
4661 op->addr.mem.seg = VCPU_SREG_ES;
4668 op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
4669 fetch_register_operand(op);
4674 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4677 rc = decode_imm(ctxt, op, 1, true);
4685 rc = decode_imm(ctxt, op, imm_size(ctxt), true);
4688 rc = decode_imm(ctxt, op, ctxt->op_bytes, true);
4691 ctxt->memop.bytes = 1;
4692 if (ctxt->memop.type == OP_REG) {
4693 ctxt->memop.addr.reg = decode_register(ctxt,
4694 ctxt->modrm_rm, true);
4695 fetch_register_operand(&ctxt->memop);
4699 ctxt->memop.bytes = 2;
4702 ctxt->memop.bytes = 4;
4705 rc = decode_imm(ctxt, op, 2, false);
4708 rc = decode_imm(ctxt, op, imm_size(ctxt), false);
4712 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4714 register_address(ctxt, VCPU_REGS_RSI);
4715 op->addr.mem.seg = ctxt->seg_override;
4721 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4724 reg_read(ctxt, VCPU_REGS_RBX) +
4725 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4726 op->addr.mem.seg = ctxt->seg_override;
4731 op->addr.mem.ea = ctxt->_eip;
4732 op->bytes = ctxt->op_bytes + 2;
4733 insn_fetch_arr(op->valptr, op->bytes, ctxt);
4736 ctxt->memop.bytes = ctxt->op_bytes + 2;
4740 op->val = VCPU_SREG_ES;
4744 op->val = VCPU_SREG_CS;
4748 op->val = VCPU_SREG_SS;
4752 op->val = VCPU_SREG_DS;
4756 op->val = VCPU_SREG_FS;
4760 op->val = VCPU_SREG_GS;
4763 /* Special instructions do their own operand decoding. */
4765 op->type = OP_NONE; /* Disable writeback. */
4773 int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4775 int rc = X86EMUL_CONTINUE;
4776 int mode = ctxt->mode;
4777 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4778 bool op_prefix = false;
4779 bool has_seg_override = false;
4780 struct opcode opcode;
4782 ctxt->memop.type = OP_NONE;
4783 ctxt->memopp = NULL;
4784 ctxt->_eip = ctxt->eip;
4785 ctxt->fetch.ptr = ctxt->fetch.data;
4786 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4787 ctxt->opcode_len = 1;
4789 memcpy(ctxt->fetch.data, insn, insn_len);
4791 rc = __do_insn_fetch_bytes(ctxt, 1);
4792 if (rc != X86EMUL_CONTINUE)
4797 case X86EMUL_MODE_REAL:
4798 case X86EMUL_MODE_VM86:
4799 case X86EMUL_MODE_PROT16:
4800 def_op_bytes = def_ad_bytes = 2;
4802 case X86EMUL_MODE_PROT32:
4803 def_op_bytes = def_ad_bytes = 4;
4805 #ifdef CONFIG_X86_64
4806 case X86EMUL_MODE_PROT64:
4812 return EMULATION_FAILED;
4815 ctxt->op_bytes = def_op_bytes;
4816 ctxt->ad_bytes = def_ad_bytes;
4818 /* Legacy prefixes. */
4820 switch (ctxt->b = insn_fetch(u8, ctxt)) {
4821 case 0x66: /* operand-size override */
4823 /* switch between 2/4 bytes */
4824 ctxt->op_bytes = def_op_bytes ^ 6;
4826 case 0x67: /* address-size override */
4827 if (mode == X86EMUL_MODE_PROT64)
4828 /* switch between 4/8 bytes */
4829 ctxt->ad_bytes = def_ad_bytes ^ 12;
4831 /* switch between 2/4 bytes */
4832 ctxt->ad_bytes = def_ad_bytes ^ 6;
4834 case 0x26: /* ES override */
4835 case 0x2e: /* CS override */
4836 case 0x36: /* SS override */
4837 case 0x3e: /* DS override */
4838 has_seg_override = true;
4839 ctxt->seg_override = (ctxt->b >> 3) & 3;
4841 case 0x64: /* FS override */
4842 case 0x65: /* GS override */
4843 has_seg_override = true;
4844 ctxt->seg_override = ctxt->b & 7;
4846 case 0x40 ... 0x4f: /* REX */
4847 if (mode != X86EMUL_MODE_PROT64)
4849 ctxt->rex_prefix = ctxt->b;
4851 case 0xf0: /* LOCK */
4852 ctxt->lock_prefix = 1;
4854 case 0xf2: /* REPNE/REPNZ */
4855 case 0xf3: /* REP/REPE/REPZ */
4856 ctxt->rep_prefix = ctxt->b;
4862 /* Any legacy prefix after a REX prefix nullifies its effect. */
4864 ctxt->rex_prefix = 0;
4870 if (ctxt->rex_prefix & 8)
4871 ctxt->op_bytes = 8; /* REX.W */
4873 /* Opcode byte(s). */
4874 opcode = opcode_table[ctxt->b];
4875 /* Two-byte opcode? */
4876 if (ctxt->b == 0x0f) {
4877 ctxt->opcode_len = 2;
4878 ctxt->b = insn_fetch(u8, ctxt);
4879 opcode = twobyte_table[ctxt->b];
4881 /* 0F_38 opcode map */
4882 if (ctxt->b == 0x38) {
4883 ctxt->opcode_len = 3;
4884 ctxt->b = insn_fetch(u8, ctxt);
4885 opcode = opcode_map_0f_38[ctxt->b];
4888 ctxt->d = opcode.flags;
4890 if (ctxt->d & ModRM)
4891 ctxt->modrm = insn_fetch(u8, ctxt);
4893 /* vex-prefix instructions are not implemented */
4894 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4895 (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
4899 while (ctxt->d & GroupMask) {
4900 switch (ctxt->d & GroupMask) {
4902 goffset = (ctxt->modrm >> 3) & 7;
4903 opcode = opcode.u.group[goffset];
4906 goffset = (ctxt->modrm >> 3) & 7;
4907 if ((ctxt->modrm >> 6) == 3)
4908 opcode = opcode.u.gdual->mod3[goffset];
4910 opcode = opcode.u.gdual->mod012[goffset];
4913 goffset = ctxt->modrm & 7;
4914 opcode = opcode.u.group[goffset];
4917 if (ctxt->rep_prefix && op_prefix)
4918 return EMULATION_FAILED;
4919 simd_prefix = op_prefix ? 0x66 : ctxt->rep_prefix;
4920 switch (simd_prefix) {
4921 case 0x00: opcode = opcode.u.gprefix->pfx_no; break;
4922 case 0x66: opcode = opcode.u.gprefix->pfx_66; break;
4923 case 0xf2: opcode = opcode.u.gprefix->pfx_f2; break;
4924 case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break;
4928 if (ctxt->modrm > 0xbf)
4929 opcode = opcode.u.esc->high[ctxt->modrm - 0xc0];
4931 opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
4934 if ((ctxt->modrm >> 6) == 3)
4935 opcode = opcode.u.idual->mod3;
4937 opcode = opcode.u.idual->mod012;
4940 if (ctxt->mode == X86EMUL_MODE_PROT64)
4941 opcode = opcode.u.mdual->mode64;
4943 opcode = opcode.u.mdual->mode32;
4946 return EMULATION_FAILED;
4949 ctxt->d &= ~(u64)GroupMask;
4950 ctxt->d |= opcode.flags;
4955 return EMULATION_FAILED;
4957 ctxt->execute = opcode.u.execute;
4959 if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
4960 return EMULATION_FAILED;
4962 if (unlikely(ctxt->d &
4963 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
4966 * These are copied unconditionally here, and checked unconditionally
4967 * in x86_emulate_insn.
4969 ctxt->check_perm = opcode.check_perm;
4970 ctxt->intercept = opcode.intercept;
4972 if (ctxt->d & NotImpl)
4973 return EMULATION_FAILED;
4975 if (mode == X86EMUL_MODE_PROT64) {
4976 if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
4978 else if (ctxt->d & NearBranch)
4982 if (ctxt->d & Op3264) {
4983 if (mode == X86EMUL_MODE_PROT64)
4989 if ((ctxt->d & No16) && ctxt->op_bytes == 2)
4993 ctxt->op_bytes = 16;
4994 else if (ctxt->d & Mmx)
4998 /* ModRM and SIB bytes. */
4999 if (ctxt->d & ModRM) {
5000 rc = decode_modrm(ctxt, &ctxt->memop);
5001 if (!has_seg_override) {
5002 has_seg_override = true;
5003 ctxt->seg_override = ctxt->modrm_seg;
5005 } else if (ctxt->d & MemAbs)
5006 rc = decode_abs(ctxt, &ctxt->memop);
5007 if (rc != X86EMUL_CONTINUE)
5010 if (!has_seg_override)
5011 ctxt->seg_override = VCPU_SREG_DS;
5013 ctxt->memop.addr.mem.seg = ctxt->seg_override;
5016 * Decode and fetch the source operand: register, memory
5019 rc = decode_operand(ctxt, &ctxt->src, (ctxt->d >> SrcShift) & OpMask);
5020 if (rc != X86EMUL_CONTINUE)
5024 * Decode and fetch the second source operand: register, memory
5027 rc = decode_operand(ctxt, &ctxt->src2, (ctxt->d >> Src2Shift) & OpMask);
5028 if (rc != X86EMUL_CONTINUE)
5031 /* Decode and fetch the destination operand: register or memory. */
5032 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
5034 if (ctxt->rip_relative && likely(ctxt->memopp))
5035 ctxt->memopp->addr.mem.ea = address_mask(ctxt,
5036 ctxt->memopp->addr.mem.ea + ctxt->_eip);
5039 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
5042 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt)
5044 return ctxt->d & PageTable;
5047 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
5049 /* The second termination condition only applies for REPE
5050 * and REPNE. Test if the repeat string operation prefix is
5051 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
5052 * corresponding termination condition according to:
5053 * - if REPE/REPZ and ZF = 0 then done
5054 * - if REPNE/REPNZ and ZF = 1 then done
5056 if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
5057 (ctxt->b == 0xae) || (ctxt->b == 0xaf))
5058 && (((ctxt->rep_prefix == REPE_PREFIX) &&
5059 ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
5060 || ((ctxt->rep_prefix == REPNE_PREFIX) &&
5061 ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
5067 static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
5071 ctxt->ops->get_fpu(ctxt);
5072 asm volatile("1: fwait \n\t"
5074 ".pushsection .fixup,\"ax\" \n\t"
5076 "movb $1, %[fault] \n\t"
5079 _ASM_EXTABLE(1b, 3b)
5080 : [fault]"+qm"(fault));
5081 ctxt->ops->put_fpu(ctxt);
5083 if (unlikely(fault))
5084 return emulate_exception(ctxt, MF_VECTOR, 0, false);
5086 return X86EMUL_CONTINUE;
5089 static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
5092 if (op->type == OP_MM)
5093 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
5096 static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
5098 register void *__sp asm(_ASM_SP);
5099 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
5101 if (!(ctxt->d & ByteOp))
5102 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
5104 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
5105 : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
5106 [fastop]"+S"(fop), "+r"(__sp)
5107 : "c"(ctxt->src2.val));
5109 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
5110 if (!fop) /* exception is returned in fop variable */
5111 return emulate_de(ctxt);
5112 return X86EMUL_CONTINUE;
5115 void init_decode_cache(struct x86_emulate_ctxt *ctxt)
5117 memset(&ctxt->rip_relative, 0,
5118 (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
5120 ctxt->io_read.pos = 0;
5121 ctxt->io_read.end = 0;
5122 ctxt->mem_read.end = 0;
5125 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5127 const struct x86_emulate_ops *ops = ctxt->ops;
5128 int rc = X86EMUL_CONTINUE;
5129 int saved_dst_type = ctxt->dst.type;
5131 ctxt->mem_read.pos = 0;
5133 /* LOCK prefix is allowed only with some instructions */
5134 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
5135 rc = emulate_ud(ctxt);
5139 if ((ctxt->d & SrcMask) == SrcMemFAddr && ctxt->src.type != OP_MEM) {
5140 rc = emulate_ud(ctxt);
5144 if (unlikely(ctxt->d &
5145 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
5146 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
5147 (ctxt->d & Undefined)) {
5148 rc = emulate_ud(ctxt);
5152 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
5153 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
5154 rc = emulate_ud(ctxt);
5158 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
5159 rc = emulate_nm(ctxt);
5163 if (ctxt->d & Mmx) {
5164 rc = flush_pending_x87_faults(ctxt);
5165 if (rc != X86EMUL_CONTINUE)
5168 * Now that we know the fpu is exception safe, we can fetch
5171 fetch_possible_mmx_operand(ctxt, &ctxt->src);
5172 fetch_possible_mmx_operand(ctxt, &ctxt->src2);
5173 if (!(ctxt->d & Mov))
5174 fetch_possible_mmx_operand(ctxt, &ctxt->dst);
5177 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
5178 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5179 X86_ICPT_PRE_EXCEPT);
5180 if (rc != X86EMUL_CONTINUE)
5184 /* Instruction can only be executed in protected mode */
5185 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
5186 rc = emulate_ud(ctxt);
5190 /* Privileged instruction can be executed only in CPL=0 */
5191 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
5192 if (ctxt->d & PrivUD)
5193 rc = emulate_ud(ctxt);
5195 rc = emulate_gp(ctxt, 0);
5199 /* Do instruction specific permission checks */
5200 if (ctxt->d & CheckPerm) {
5201 rc = ctxt->check_perm(ctxt);
5202 if (rc != X86EMUL_CONTINUE)
5206 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5207 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5208 X86_ICPT_POST_EXCEPT);
5209 if (rc != X86EMUL_CONTINUE)
5213 if (ctxt->rep_prefix && (ctxt->d & String)) {
5214 /* All REP prefixes have the same first termination condition */
5215 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
5216 string_registers_quirk(ctxt);
5217 ctxt->eip = ctxt->_eip;
5218 ctxt->eflags &= ~X86_EFLAGS_RF;
5224 if ((ctxt->src.type == OP_MEM) && !(ctxt->d & NoAccess)) {
5225 rc = segmented_read(ctxt, ctxt->src.addr.mem,
5226 ctxt->src.valptr, ctxt->src.bytes);
5227 if (rc != X86EMUL_CONTINUE)
5229 ctxt->src.orig_val64 = ctxt->src.val64;
5232 if (ctxt->src2.type == OP_MEM) {
5233 rc = segmented_read(ctxt, ctxt->src2.addr.mem,
5234 &ctxt->src2.val, ctxt->src2.bytes);
5235 if (rc != X86EMUL_CONTINUE)
5239 if ((ctxt->d & DstMask) == ImplicitOps)
5243 if ((ctxt->dst.type == OP_MEM) && !(ctxt->d & Mov)) {
5244 /* optimisation - avoid slow emulated read if Mov */
5245 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
5246 &ctxt->dst.val, ctxt->dst.bytes);
5247 if (rc != X86EMUL_CONTINUE) {
5248 if (!(ctxt->d & NoWrite) &&
5249 rc == X86EMUL_PROPAGATE_FAULT &&
5250 ctxt->exception.vector == PF_VECTOR)
5251 ctxt->exception.error_code |= PFERR_WRITE_MASK;
5255 /* Copy full 64-bit value for CMPXCHG8B. */
5256 ctxt->dst.orig_val64 = ctxt->dst.val64;
5260 if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
5261 rc = emulator_check_intercept(ctxt, ctxt->intercept,
5262 X86_ICPT_POST_MEMACCESS);
5263 if (rc != X86EMUL_CONTINUE)
5267 if (ctxt->rep_prefix && (ctxt->d & String))
5268 ctxt->eflags |= X86_EFLAGS_RF;
5270 ctxt->eflags &= ~X86_EFLAGS_RF;
5272 if (ctxt->execute) {
5273 if (ctxt->d & Fastop) {
5274 void (*fop)(struct fastop *) = (void *)ctxt->execute;
5275 rc = fastop(ctxt, fop);
5276 if (rc != X86EMUL_CONTINUE)
5280 rc = ctxt->execute(ctxt);
5281 if (rc != X86EMUL_CONTINUE)
5286 if (ctxt->opcode_len == 2)
5288 else if (ctxt->opcode_len == 3)
5289 goto threebyte_insn;
5292 case 0x70 ... 0x7f: /* jcc (short) */
5293 if (test_cc(ctxt->b, ctxt->eflags))
5294 rc = jmp_rel(ctxt, ctxt->src.val);
5296 case 0x8d: /* lea r16/r32, m */
5297 ctxt->dst.val = ctxt->src.addr.mem.ea;
5299 case 0x90 ... 0x97: /* nop / xchg reg, rax */
5300 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
5301 ctxt->dst.type = OP_NONE;
5305 case 0x98: /* cbw/cwde/cdqe */
5306 switch (ctxt->op_bytes) {
5307 case 2: ctxt->dst.val = (s8)ctxt->dst.val; break;
5308 case 4: ctxt->dst.val = (s16)ctxt->dst.val; break;
5309 case 8: ctxt->dst.val = (s32)ctxt->dst.val; break;
5312 case 0xcc: /* int3 */
5313 rc = emulate_int(ctxt, 3);
5315 case 0xcd: /* int n */
5316 rc = emulate_int(ctxt, ctxt->src.val);
5318 case 0xce: /* into */
5319 if (ctxt->eflags & X86_EFLAGS_OF)
5320 rc = emulate_int(ctxt, 4);
5322 case 0xe9: /* jmp rel */
5323 case 0xeb: /* jmp rel short */
5324 rc = jmp_rel(ctxt, ctxt->src.val);
5325 ctxt->dst.type = OP_NONE; /* Disable writeback. */
5327 case 0xf4: /* hlt */
5328 ctxt->ops->halt(ctxt);
5330 case 0xf5: /* cmc */
5331 /* complement carry flag from eflags reg */
5332 ctxt->eflags ^= X86_EFLAGS_CF;
5334 case 0xf8: /* clc */
5335 ctxt->eflags &= ~X86_EFLAGS_CF;
5337 case 0xf9: /* stc */
5338 ctxt->eflags |= X86_EFLAGS_CF;
5340 case 0xfc: /* cld */
5341 ctxt->eflags &= ~X86_EFLAGS_DF;
5343 case 0xfd: /* std */
5344 ctxt->eflags |= X86_EFLAGS_DF;
5347 goto cannot_emulate;
5350 if (rc != X86EMUL_CONTINUE)
5354 if (ctxt->d & SrcWrite) {
5355 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
5356 rc = writeback(ctxt, &ctxt->src);
5357 if (rc != X86EMUL_CONTINUE)
5360 if (!(ctxt->d & NoWrite)) {
5361 rc = writeback(ctxt, &ctxt->dst);
5362 if (rc != X86EMUL_CONTINUE)
5367 * restore dst type in case the decoding will be reused
5368 * (happens for string instruction )
5370 ctxt->dst.type = saved_dst_type;
5372 if ((ctxt->d & SrcMask) == SrcSI)
5373 string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
5375 if ((ctxt->d & DstMask) == DstDI)
5376 string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
5378 if (ctxt->rep_prefix && (ctxt->d & String)) {
5380 struct read_cache *r = &ctxt->io_read;
5381 if ((ctxt->d & SrcMask) == SrcSI)
5382 count = ctxt->src.count;
5384 count = ctxt->dst.count;
5385 register_address_increment(ctxt, VCPU_REGS_RCX, -count);
5387 if (!string_insn_completed(ctxt)) {
5389 * Re-enter guest when pio read ahead buffer is empty
5390 * or, if it is not used, after each 1024 iteration.
5392 if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
5393 (r->end == 0 || r->end != r->pos)) {
5395 * Reset read cache. Usually happens before
5396 * decode, but since instruction is restarted
5397 * we have to do it here.
5399 ctxt->mem_read.end = 0;
5400 writeback_registers(ctxt);
5401 return EMULATION_RESTART;
5403 goto done; /* skip rip writeback */
5405 ctxt->eflags &= ~X86_EFLAGS_RF;
5408 ctxt->eip = ctxt->_eip;
5411 if (rc == X86EMUL_PROPAGATE_FAULT) {
5412 WARN_ON(ctxt->exception.vector > 0x1f);
5413 ctxt->have_exception = true;
5415 if (rc == X86EMUL_INTERCEPTED)
5416 return EMULATION_INTERCEPTED;
5418 if (rc == X86EMUL_CONTINUE)
5419 writeback_registers(ctxt);
5421 return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
5425 case 0x09: /* wbinvd */
5426 (ctxt->ops->wbinvd)(ctxt);
5428 case 0x08: /* invd */
5429 case 0x0d: /* GrpP (prefetch) */
5430 case 0x18: /* Grp16 (prefetch/nop) */
5431 case 0x1f: /* nop */
5433 case 0x20: /* mov cr, reg */
5434 ctxt->dst.val = ops->get_cr(ctxt, ctxt->modrm_reg);
5436 case 0x21: /* mov from dr to reg */
5437 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
5439 case 0x40 ... 0x4f: /* cmov */
5440 if (test_cc(ctxt->b, ctxt->eflags))
5441 ctxt->dst.val = ctxt->src.val;
5442 else if (ctxt->op_bytes != 4)
5443 ctxt->dst.type = OP_NONE; /* no writeback */
5445 case 0x80 ... 0x8f: /* jnz rel, etc*/
5446 if (test_cc(ctxt->b, ctxt->eflags))
5447 rc = jmp_rel(ctxt, ctxt->src.val);
5449 case 0x90 ... 0x9f: /* setcc r/m8 */
5450 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
5452 case 0xb6 ... 0xb7: /* movzx */
5453 ctxt->dst.bytes = ctxt->op_bytes;
5454 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
5455 : (u16) ctxt->src.val;
5457 case 0xbe ... 0xbf: /* movsx */
5458 ctxt->dst.bytes = ctxt->op_bytes;
5459 ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
5460 (s16) ctxt->src.val;
5463 goto cannot_emulate;
5468 if (rc != X86EMUL_CONTINUE)
5474 return EMULATION_FAILED;
5477 void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
5479 invalidate_registers(ctxt);
5482 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
5484 writeback_registers(ctxt);