KVM: emulate: simplify writeback
[sfrench/cifs-2.6.git] / arch / x86 / kvm / emulate.c
index e4e833d3d7d7bb826907f7053d56c1c45748e476..32d3da82da2e6dfa1dc227f695e43bc801c8844c 100644 (file)
 #define NoWrite     ((u64)1 << 45)  /* No writeback */
 #define SrcWrite    ((u64)1 << 46)  /* Write back src operand */
 #define NoMod      ((u64)1 << 47)  /* Mod field is ignored */
+#define Intercept   ((u64)1 << 48)  /* Has valid intercept field */
+#define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
 
 #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
 
@@ -426,6 +428,7 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
                .modrm_reg  = ctxt->modrm_reg,
                .modrm_rm   = ctxt->modrm_rm,
                .src_val    = ctxt->src.val64,
+               .dst_val    = ctxt->dst.val64,
                .src_bytes  = ctxt->src.bytes,
                .dst_bytes  = ctxt->dst.bytes,
                .ad_bytes   = ctxt->ad_bytes,
@@ -1093,7 +1096,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
                if (ctxt->d & Mmx) {
                        op->type = OP_MM;
                        op->bytes = 8;
-                       op->addr.xmm = ctxt->modrm_rm & 7;
+                       op->addr.mm = ctxt->modrm_rm & 7;
                        return rc;
                }
                fetch_register_operand(op);
@@ -1220,12 +1223,14 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
        long sv = 0, mask;
 
        if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
-               mask = ~(ctxt->dst.bytes * 8 - 1);
+               mask = ~((long)ctxt->dst.bytes * 8 - 1);
 
                if (ctxt->src.bytes == 2)
                        sv = (s16)ctxt->src.val & (s16)mask;
                else if (ctxt->src.bytes == 4)
                        sv = (s32)ctxt->src.val & (s32)mask;
+               else
+                       sv = (s64)ctxt->src.val & (s64)mask;
 
                ctxt->dst.addr.mem.ea += (sv >> 3);
        }
@@ -1358,17 +1363,19 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
                                     u16 selector, struct desc_ptr *dt)
 {
        const struct x86_emulate_ops *ops = ctxt->ops;
+       u32 base3 = 0;
 
        if (selector & 1 << 2) {
                struct desc_struct desc;
                u16 sel;
 
                memset (dt, 0, sizeof *dt);
-               if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR))
+               if (!ops->get_segment(ctxt, &sel, &desc, &base3,
+                                     VCPU_SREG_LDTR))
                        return;
 
                dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
-               dt->address = get_desc_base(&desc);
+               dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
        } else
                ops->get_gdt(ctxt, dt);
 }
@@ -1422,6 +1429,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
        ulong desc_addr;
        int ret;
        u16 dummy;
+       u32 base3 = 0;
 
        memset(&seg_desc, 0, sizeof seg_desc);
 
@@ -1538,9 +1546,14 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
                ret = write_segment_descriptor(ctxt, selector, &seg_desc);
                if (ret != X86EMUL_CONTINUE)
                        return ret;
+       } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
+               ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
+                               sizeof(base3), &ctxt->exception);
+               if (ret != X86EMUL_CONTINUE)
+                       return ret;
        }
 load:
-       ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg);
+       ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
        return X86EMUL_CONTINUE;
 exception:
        emulate_exception(ctxt, err_vec, err_code, true);
@@ -1575,34 +1588,28 @@ static void write_register_operand(struct operand *op)
 
 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
 {
-       int rc;
-
        switch (op->type) {
        case OP_REG:
                write_register_operand(op);
                break;
        case OP_MEM:
                if (ctxt->lock_prefix)
-                       rc = segmented_cmpxchg(ctxt,
+                       return segmented_cmpxchg(ctxt,
+                                                op->addr.mem,
+                                                &op->orig_val,
+                                                &op->val,
+                                                op->bytes);
+               else
+                       return segmented_write(ctxt,
                                               op->addr.mem,
-                                              &op->orig_val,
                                               &op->val,
                                               op->bytes);
-               else
-                       rc = segmented_write(ctxt,
-                                            op->addr.mem,
-                                            &op->val,
-                                            op->bytes);
-               if (rc != X86EMUL_CONTINUE)
-                       return rc;
                break;
        case OP_MEM_STR:
-               rc = segmented_write(ctxt,
-                               op->addr.mem,
-                               op->data,
-                               op->bytes * op->count);
-               if (rc != X86EMUL_CONTINUE)
-                       return rc;
+               return segmented_write(ctxt,
+                                      op->addr.mem,
+                                      op->data,
+                                      op->bytes * op->count);
                break;
        case OP_XMM:
                write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
@@ -1754,6 +1761,9 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
        if (rc != X86EMUL_CONTINUE)
                return rc;
 
+       if (ctxt->modrm_reg == VCPU_SREG_SS)
+               ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
+
        rc = load_segment_descriptor(ctxt, (u16)selector, seg);
        return rc;
 }
@@ -1991,6 +2001,9 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
 {
        u64 old = ctxt->dst.orig_val64;
 
+       if (ctxt->dst.bytes == 16)
+               return X86EMUL_UNHANDLEABLE;
+
        if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
            ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
                *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
@@ -2017,6 +2030,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 {
        int rc;
        unsigned long cs;
+       int cpl = ctxt->ops->cpl(ctxt);
 
        rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
        if (rc != X86EMUL_CONTINUE)
@@ -2026,6 +2040,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
        rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
        if (rc != X86EMUL_CONTINUE)
                return rc;
+       /* Outer-privilege level return is not implemented */
+       if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
+               return X86EMUL_UNHANDLEABLE;
        rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
        return rc;
 }
@@ -2044,8 +2061,10 @@ static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
 {
        /* Save real source value, then compare EAX against destination. */
+       ctxt->dst.orig_val = ctxt->dst.val;
+       ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
        ctxt->src.orig_val = ctxt->src.val;
-       ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX);
+       ctxt->src.val = ctxt->dst.orig_val;
        fastop(ctxt, em_cmp);
 
        if (ctxt->eflags & EFLG_ZF) {
@@ -2055,6 +2074,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
                /* Failure: write the value we saw to EAX. */
                ctxt->dst.type = OP_REG;
                ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
+               ctxt->dst.val = ctxt->dst.orig_val;
        }
        return X86EMUL_CONTINUE;
 }
@@ -2964,7 +2984,7 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
 
 static int em_mov(struct x86_emulate_ctxt *ctxt)
 {
-       memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes);
+       memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
        return X86EMUL_CONTINUE;
 }
 
@@ -3221,7 +3241,8 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt)
 
 static int em_smsw(struct x86_emulate_ctxt *ctxt)
 {
-       ctxt->dst.bytes = 2;
+       if (ctxt->dst.type == OP_MEM)
+               ctxt->dst.bytes = 2;
        ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
        return X86EMUL_CONTINUE;
 }
@@ -3496,7 +3517,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
        u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
 
        if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
-           (rcx > 3))
+           ctxt->ops->check_pmc(ctxt, rcx))
                return emulate_gp(ctxt, 0);
 
        return X86EMUL_CONTINUE;
@@ -3521,9 +3542,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 }
 
 #define D(_y) { .flags = (_y) }
-#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i }
-#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \
-                     .check_perm = (_p) }
+#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
+#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
+                     .intercept = x86_intercept_##_i, .check_perm = (_p) }
 #define N    D(NotImpl)
 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
@@ -3532,10 +3553,10 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
 #define II(_f, _e, _i) \
-       { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
+       { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
 #define IIP(_f, _e, _i, _p) \
-       { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \
-         .check_perm = (_p) }
+       { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
+         .intercept = x86_intercept_##_i, .check_perm = (_p) }
 #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
 
 #define D2bv(_f)      D((_f) | ByteOp), D(_f)
@@ -3634,8 +3655,8 @@ static const struct opcode group6[] = {
 };
 
 static const struct group_dual group7 = { {
-       II(Mov | DstMem | Priv,                 em_sgdt, sgdt),
-       II(Mov | DstMem | Priv,                 em_sidt, sidt),
+       II(Mov | DstMem,                        em_sgdt, sgdt),
+       II(Mov | DstMem,                        em_sidt, sidt),
        II(SrcMem | Priv,                       em_lgdt, lgdt),
        II(SrcMem | Priv,                       em_lidt, lidt),
        II(SrcNone | DstMem | Mov,              em_smsw, smsw), N,
@@ -3899,7 +3920,7 @@ static const struct opcode twobyte_table[256] = {
        N, N,
        N, N, N, N, N, N, N, N,
        /* 0x40 - 0x4F */
-       X16(D(DstReg | SrcMem | ModRM | Mov)),
+       X16(D(DstReg | SrcMem | ModRM)),
        /* 0x50 - 0x5F */
        N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
        /* 0x60 - 0x6F */
@@ -4061,12 +4082,12 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
        mem_common:
                *op = ctxt->memop;
                ctxt->memopp = op;
-               if ((ctxt->d & BitOp) && op == &ctxt->dst)
+               if (ctxt->d & BitOp)
                        fetch_bit_operand(ctxt);
                op->orig_val = op->val;
                break;
        case OpMem64:
-               ctxt->memop.bytes = 8;
+               ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
                goto mem_common;
        case OpAcc:
                op->type = OP_REG;
@@ -4314,6 +4335,13 @@ done_prefixes:
        if (ctxt->d & ModRM)
                ctxt->modrm = insn_fetch(u8, ctxt);
 
+       /* vex-prefix instructions are not implemented */
+       if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
+           (mode == X86EMUL_MODE_PROT64 ||
+           (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) {
+               ctxt->d = NotImpl;
+       }
+
        while (ctxt->d & GroupMask) {
                switch (ctxt->d & GroupMask) {
                case Group:
@@ -4356,31 +4384,42 @@ done_prefixes:
                ctxt->d |= opcode.flags;
        }
 
-       ctxt->execute = opcode.u.execute;
-       ctxt->check_perm = opcode.check_perm;
-       ctxt->intercept = opcode.intercept;
-
        /* Unrecognised? */
-       if (ctxt->d == 0 || (ctxt->d & NotImpl))
+       if (ctxt->d == 0)
                return EMULATION_FAILED;
 
-       if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
-               return EMULATION_FAILED;
+       ctxt->execute = opcode.u.execute;
 
-       if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
-               ctxt->op_bytes = 8;
+       if (unlikely(ctxt->d &
+                    (NotImpl|EmulateOnUD|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
+               /*
+                * These are copied unconditionally here, and checked unconditionally
+                * in x86_emulate_insn.
+                */
+               ctxt->check_perm = opcode.check_perm;
+               ctxt->intercept = opcode.intercept;
+
+               if (ctxt->d & NotImpl)
+                       return EMULATION_FAILED;
 
-       if (ctxt->d & Op3264) {
-               if (mode == X86EMUL_MODE_PROT64)
+               if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
+                       return EMULATION_FAILED;
+
+               if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
                        ctxt->op_bytes = 8;
-               else
-                       ctxt->op_bytes = 4;
-       }
 
-       if (ctxt->d & Sse)
-               ctxt->op_bytes = 16;
-       else if (ctxt->d & Mmx)
-               ctxt->op_bytes = 8;
+               if (ctxt->d & Op3264) {
+                       if (mode == X86EMUL_MODE_PROT64)
+                               ctxt->op_bytes = 8;
+                       else
+                               ctxt->op_bytes = 4;
+               }
+
+               if (ctxt->d & Sse)
+                       ctxt->op_bytes = 16;
+               else if (ctxt->d & Mmx)
+                       ctxt->op_bytes = 8;
+       }
 
        /* ModRM and SIB bytes. */
        if (ctxt->d & ModRM) {
@@ -4503,12 +4542,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 
        ctxt->mem_read.pos = 0;
 
-       if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
-                       (ctxt->d & Undefined)) {
-               rc = emulate_ud(ctxt);
-               goto done;
-       }
-
        /* LOCK prefix is allowed only with some instructions */
        if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
                rc = emulate_ud(ctxt);
@@ -4520,69 +4553,78 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
                goto done;
        }
 
-       if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
-           || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
-               rc = emulate_ud(ctxt);
-               goto done;
-       }
-
-       if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
-               rc = emulate_nm(ctxt);
-               goto done;
-       }
+       if (unlikely(ctxt->d &
+                    (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
+               if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
+                               (ctxt->d & Undefined)) {
+                       rc = emulate_ud(ctxt);
+                       goto done;
+               }
 
-       if (ctxt->d & Mmx) {
-               rc = flush_pending_x87_faults(ctxt);
-               if (rc != X86EMUL_CONTINUE)
+               if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
+                   || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
+                       rc = emulate_ud(ctxt);
                        goto done;
-               /*
-                * Now that we know the fpu is exception safe, we can fetch
-                * operands from it.
-                */
-               fetch_possible_mmx_operand(ctxt, &ctxt->src);
-               fetch_possible_mmx_operand(ctxt, &ctxt->src2);
-               if (!(ctxt->d & Mov))
-                       fetch_possible_mmx_operand(ctxt, &ctxt->dst);
-       }
+               }
 
-       if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
-               rc = emulator_check_intercept(ctxt, ctxt->intercept,
-                                             X86_ICPT_PRE_EXCEPT);
-               if (rc != X86EMUL_CONTINUE)
+               if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
+                       rc = emulate_nm(ctxt);
                        goto done;
-       }
+               }
 
-       /* Privileged instruction can be executed only in CPL=0 */
-       if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
-               rc = emulate_gp(ctxt, 0);
-               goto done;
-       }
+               if (ctxt->d & Mmx) {
+                       rc = flush_pending_x87_faults(ctxt);
+                       if (rc != X86EMUL_CONTINUE)
+                               goto done;
+                       /*
+                        * Now that we know the fpu is exception safe, we can fetch
+                        * operands from it.
+                        */
+                       fetch_possible_mmx_operand(ctxt, &ctxt->src);
+                       fetch_possible_mmx_operand(ctxt, &ctxt->src2);
+                       if (!(ctxt->d & Mov))
+                               fetch_possible_mmx_operand(ctxt, &ctxt->dst);
+               }
 
-       /* Instruction can only be executed in protected mode */
-       if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
-               rc = emulate_ud(ctxt);
-               goto done;
-       }
+               if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
+                       rc = emulator_check_intercept(ctxt, ctxt->intercept,
+                                                     X86_ICPT_PRE_EXCEPT);
+                       if (rc != X86EMUL_CONTINUE)
+                               goto done;
+               }
 
-       /* Do instruction specific permission checks */
-       if (ctxt->check_perm) {
-               rc = ctxt->check_perm(ctxt);
-               if (rc != X86EMUL_CONTINUE)
+               /* Privileged instruction can be executed only in CPL=0 */
+               if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
+                       rc = emulate_gp(ctxt, 0);
                        goto done;
-       }
+               }
 
-       if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
-               rc = emulator_check_intercept(ctxt, ctxt->intercept,
-                                             X86_ICPT_POST_EXCEPT);
-               if (rc != X86EMUL_CONTINUE)
+               /* Instruction can only be executed in protected mode */
+               if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
+                       rc = emulate_ud(ctxt);
                        goto done;
-       }
+               }
 
-       if (ctxt->rep_prefix && (ctxt->d & String)) {
-               /* All REP prefixes have the same first termination condition */
-               if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
-                       ctxt->eip = ctxt->_eip;
-                       goto done;
+               /* Do instruction specific permission checks */
+               if (ctxt->check_perm) {
+                       rc = ctxt->check_perm(ctxt);
+                       if (rc != X86EMUL_CONTINUE)
+                               goto done;
+               }
+
+               if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
+                       rc = emulator_check_intercept(ctxt, ctxt->intercept,
+                                                     X86_ICPT_POST_EXCEPT);
+                       if (rc != X86EMUL_CONTINUE)
+                               goto done;
+               }
+
+               if (ctxt->rep_prefix && (ctxt->d & String)) {
+                       /* All REP prefixes have the same first termination condition */
+                       if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
+                               ctxt->eip = ctxt->_eip;
+                               goto done;
+                       }
                }
        }
 
@@ -4657,8 +4699,9 @@ special_insn:
                break;
        case 0x90 ... 0x97: /* nop / xchg reg, rax */
                if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
-                       break;
-               rc = em_xchg(ctxt);
+                       ctxt->dst.type = OP_NONE;
+               else
+                       rc = em_xchg(ctxt);
                break;
        case 0x98: /* cbw/cwde/cdqe */
                switch (ctxt->op_bytes) {
@@ -4709,17 +4752,17 @@ special_insn:
                goto done;
 
 writeback:
-       if (!(ctxt->d & NoWrite)) {
-               rc = writeback(ctxt, &ctxt->dst);
-               if (rc != X86EMUL_CONTINUE)
-                       goto done;
-       }
        if (ctxt->d & SrcWrite) {
                BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
                rc = writeback(ctxt, &ctxt->src);
                if (rc != X86EMUL_CONTINUE)
                        goto done;
        }
+       if (!(ctxt->d & NoWrite)) {
+               rc = writeback(ctxt, &ctxt->dst);
+               if (rc != X86EMUL_CONTINUE)
+                       goto done;
+       }
 
        /*
         * restore dst type in case the decoding will be reused
@@ -4793,8 +4836,10 @@ twobyte_insn:
                ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
                break;
        case 0x40 ... 0x4f:     /* cmov */
-               ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val;
-               if (!test_cc(ctxt->b, ctxt->eflags))
+               if (test_cc(ctxt->b, ctxt->eflags))
+                       ctxt->dst.val = ctxt->src.val;
+               else if (ctxt->mode != X86EMUL_MODE_PROT64 ||
+                        ctxt->op_bytes != 4)
                        ctxt->dst.type = OP_NONE; /* no writeback */
                break;
        case 0x80 ... 0x8f: /* jnz rel, etc*/
@@ -4818,8 +4863,8 @@ twobyte_insn:
                break;
        case 0xc3:              /* movnti */
                ctxt->dst.bytes = ctxt->op_bytes;
-               ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val :
-                                                       (u64) ctxt->src.val;
+               ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val :
+                                                       (u32) ctxt->src.val;
                break;
        default:
                goto cannot_emulate;