KVM: SVM: always update CR3 in VMCB
[sfrench/cifs-2.6.git] / arch / x86 / kvm / svm / svm.c
1 #define pr_fmt(fmt) "SVM: " fmt
2
3 #include <linux/kvm_host.h>
4
5 #include "irq.h"
6 #include "mmu.h"
7 #include "kvm_cache_regs.h"
8 #include "x86.h"
9 #include "cpuid.h"
10 #include "pmu.h"
11
12 #include <linux/module.h>
13 #include <linux/mod_devicetable.h>
14 #include <linux/kernel.h>
15 #include <linux/vmalloc.h>
16 #include <linux/highmem.h>
17 #include <linux/amd-iommu.h>
18 #include <linux/sched.h>
19 #include <linux/trace_events.h>
20 #include <linux/slab.h>
21 #include <linux/hashtable.h>
22 #include <linux/frame.h>
23 #include <linux/psp-sev.h>
24 #include <linux/file.h>
25 #include <linux/pagemap.h>
26 #include <linux/swap.h>
27 #include <linux/rwsem.h>
28
29 #include <asm/apic.h>
30 #include <asm/perf_event.h>
31 #include <asm/tlbflush.h>
32 #include <asm/desc.h>
33 #include <asm/debugreg.h>
34 #include <asm/kvm_para.h>
35 #include <asm/irq_remapping.h>
36 #include <asm/mce.h>
37 #include <asm/spec-ctrl.h>
38 #include <asm/cpu_device_id.h>
39
40 #include <asm/virtext.h>
41 #include "trace.h"
42
43 #include "svm.h"
44
45 #define __ex(x) __kvm_handle_fault_on_reboot(x)
46
47 MODULE_AUTHOR("Qumranet");
48 MODULE_LICENSE("GPL");
49
50 #ifdef MODULE
51 static const struct x86_cpu_id svm_cpu_id[] = {
52         X86_MATCH_FEATURE(X86_FEATURE_SVM, NULL),
53         {}
54 };
55 MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
56 #endif
57
58 #define IOPM_ALLOC_ORDER 2
59 #define MSRPM_ALLOC_ORDER 1
60
61 #define SEG_TYPE_LDT 2
62 #define SEG_TYPE_BUSY_TSS16 3
63
64 #define SVM_FEATURE_LBRV           (1 <<  1)
65 #define SVM_FEATURE_SVML           (1 <<  2)
66 #define SVM_FEATURE_TSC_RATE       (1 <<  4)
67 #define SVM_FEATURE_VMCB_CLEAN     (1 <<  5)
68 #define SVM_FEATURE_FLUSH_ASID     (1 <<  6)
69 #define SVM_FEATURE_DECODE_ASSIST  (1 <<  7)
70 #define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
71
72 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
73
74 #define TSC_RATIO_RSVD          0xffffff0000000000ULL
75 #define TSC_RATIO_MIN           0x0000000000000001ULL
76 #define TSC_RATIO_MAX           0x000000ffffffffffULL
77
78 static bool erratum_383_found __read_mostly;
79
80 u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
81
82 /*
83  * Set osvw_len to higher value when updated Revision Guides
84  * are published and we know what the new status bits are
85  */
86 static uint64_t osvw_len = 4, osvw_status;
87
88 static DEFINE_PER_CPU(u64, current_tsc_ratio);
89 #define TSC_RATIO_DEFAULT       0x0100000000ULL
90
91 static const struct svm_direct_access_msrs {
92         u32 index;   /* Index of the MSR */
93         bool always; /* True if intercept is always on */
94 } direct_access_msrs[] = {
95         { .index = MSR_STAR,                            .always = true  },
96         { .index = MSR_IA32_SYSENTER_CS,                .always = true  },
97 #ifdef CONFIG_X86_64
98         { .index = MSR_GS_BASE,                         .always = true  },
99         { .index = MSR_FS_BASE,                         .always = true  },
100         { .index = MSR_KERNEL_GS_BASE,                  .always = true  },
101         { .index = MSR_LSTAR,                           .always = true  },
102         { .index = MSR_CSTAR,                           .always = true  },
103         { .index = MSR_SYSCALL_MASK,                    .always = true  },
104 #endif
105         { .index = MSR_IA32_SPEC_CTRL,                  .always = false },
106         { .index = MSR_IA32_PRED_CMD,                   .always = false },
107         { .index = MSR_IA32_LASTBRANCHFROMIP,           .always = false },
108         { .index = MSR_IA32_LASTBRANCHTOIP,             .always = false },
109         { .index = MSR_IA32_LASTINTFROMIP,              .always = false },
110         { .index = MSR_IA32_LASTINTTOIP,                .always = false },
111         { .index = MSR_INVALID,                         .always = false },
112 };
113
114 /* enable NPT for AMD64 and X86 with PAE */
115 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
116 bool npt_enabled = true;
117 #else
118 bool npt_enabled;
119 #endif
120
121 /*
122  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
123  * pause_filter_count: On processors that support Pause filtering(indicated
124  *      by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter
125  *      count value. On VMRUN this value is loaded into an internal counter.
126  *      Each time a pause instruction is executed, this counter is decremented
127  *      until it reaches zero at which time a #VMEXIT is generated if pause
128  *      intercept is enabled. Refer to  AMD APM Vol 2 Section 15.14.4 Pause
129  *      Intercept Filtering for more details.
130  *      This also indicate if ple logic enabled.
131  *
132  * pause_filter_thresh: In addition, some processor families support advanced
133  *      pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on
134  *      the amount of time a guest is allowed to execute in a pause loop.
135  *      In this mode, a 16-bit pause filter threshold field is added in the
136  *      VMCB. The threshold value is a cycle count that is used to reset the
137  *      pause counter. As with simple pause filtering, VMRUN loads the pause
138  *      count value from VMCB into an internal counter. Then, on each pause
139  *      instruction the hardware checks the elapsed number of cycles since
140  *      the most recent pause instruction against the pause filter threshold.
141  *      If the elapsed cycle count is greater than the pause filter threshold,
142  *      then the internal pause count is reloaded from the VMCB and execution
143  *      continues. If the elapsed cycle count is less than the pause filter
144  *      threshold, then the internal pause count is decremented. If the count
145  *      value is less than zero and PAUSE intercept is enabled, a #VMEXIT is
146  *      triggered. If advanced pause filtering is supported and pause filter
147  *      threshold field is set to zero, the filter will operate in the simpler,
148  *      count only mode.
149  */
150
151 static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
152 module_param(pause_filter_thresh, ushort, 0444);
153
154 static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
155 module_param(pause_filter_count, ushort, 0444);
156
157 /* Default doubles per-vcpu window every exit. */
158 static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
159 module_param(pause_filter_count_grow, ushort, 0444);
160
161 /* Default resets per-vcpu window every exit to pause_filter_count. */
162 static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
163 module_param(pause_filter_count_shrink, ushort, 0444);
164
165 /* Default is to compute the maximum so we can never overflow. */
166 static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
167 module_param(pause_filter_count_max, ushort, 0444);
168
169 /* allow nested paging (virtualized MMU) for all guests */
170 static int npt = true;
171 module_param(npt, int, S_IRUGO);
172
173 /* allow nested virtualization in KVM/SVM */
174 static int nested = true;
175 module_param(nested, int, S_IRUGO);
176
177 /* enable/disable Next RIP Save */
178 static int nrips = true;
179 module_param(nrips, int, 0444);
180
181 /* enable/disable Virtual VMLOAD VMSAVE */
182 static int vls = true;
183 module_param(vls, int, 0444);
184
185 /* enable/disable Virtual GIF */
186 static int vgif = true;
187 module_param(vgif, int, 0444);
188
189 /* enable/disable SEV support */
190 static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
191 module_param(sev, int, 0444);
192
193 static bool __read_mostly dump_invalid_vmcb = 0;
194 module_param(dump_invalid_vmcb, bool, 0644);
195
196 static u8 rsm_ins_bytes[] = "\x0f\xaa";
197
198 static void svm_complete_interrupts(struct vcpu_svm *svm);
199
200 static unsigned long iopm_base;
201
202 struct kvm_ldttss_desc {
203         u16 limit0;
204         u16 base0;
205         unsigned base1:8, type:5, dpl:2, p:1;
206         unsigned limit1:4, zero0:3, g:1, base2:8;
207         u32 base3;
208         u32 zero1;
209 } __attribute__((packed));
210
211 DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
212
213 static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
214
215 #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
216 #define MSRS_RANGE_SIZE 2048
217 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
218
219 u32 svm_msrpm_offset(u32 msr)
220 {
221         u32 offset;
222         int i;
223
224         for (i = 0; i < NUM_MSR_MAPS; i++) {
225                 if (msr < msrpm_ranges[i] ||
226                     msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
227                         continue;
228
229                 offset  = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
230                 offset += (i * MSRS_RANGE_SIZE);       /* add range offset */
231
232                 /* Now we have the u8 offset - but need the u32 offset */
233                 return offset / 4;
234         }
235
236         /* MSR not in any range */
237         return MSR_INVALID;
238 }
239
240 #define MAX_INST_SIZE 15
241
242 static inline void clgi(void)
243 {
244         asm volatile (__ex("clgi"));
245 }
246
247 static inline void stgi(void)
248 {
249         asm volatile (__ex("stgi"));
250 }
251
252 static inline void invlpga(unsigned long addr, u32 asid)
253 {
254         asm volatile (__ex("invlpga %1, %0") : : "c"(asid), "a"(addr));
255 }
256
257 static int get_npt_level(struct kvm_vcpu *vcpu)
258 {
259 #ifdef CONFIG_X86_64
260         return PT64_ROOT_4LEVEL;
261 #else
262         return PT32E_ROOT_LEVEL;
263 #endif
264 }
265
266 void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
267 {
268         vcpu->arch.efer = efer;
269
270         if (!npt_enabled) {
271                 /* Shadow paging assumes NX to be available.  */
272                 efer |= EFER_NX;
273
274                 if (!(efer & EFER_LMA))
275                         efer &= ~EFER_LME;
276         }
277
278         to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
279         mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
280 }
281
282 static int is_external_interrupt(u32 info)
283 {
284         info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
285         return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
286 }
287
288 static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
289 {
290         struct vcpu_svm *svm = to_svm(vcpu);
291         u32 ret = 0;
292
293         if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
294                 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
295         return ret;
296 }
297
298 static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
299 {
300         struct vcpu_svm *svm = to_svm(vcpu);
301
302         if (mask == 0)
303                 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
304         else
305                 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
306
307 }
308
309 static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
310 {
311         struct vcpu_svm *svm = to_svm(vcpu);
312
313         if (nrips && svm->vmcb->control.next_rip != 0) {
314                 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
315                 svm->next_rip = svm->vmcb->control.next_rip;
316         }
317
318         if (!svm->next_rip) {
319                 if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
320                         return 0;
321         } else {
322                 kvm_rip_write(vcpu, svm->next_rip);
323         }
324         svm_set_interrupt_shadow(vcpu, 0);
325
326         return 1;
327 }
328
329 static void svm_queue_exception(struct kvm_vcpu *vcpu)
330 {
331         struct vcpu_svm *svm = to_svm(vcpu);
332         unsigned nr = vcpu->arch.exception.nr;
333         bool has_error_code = vcpu->arch.exception.has_error_code;
334         u32 error_code = vcpu->arch.exception.error_code;
335
336         kvm_deliver_exception_payload(&svm->vcpu);
337
338         if (nr == BP_VECTOR && !nrips) {
339                 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
340
341                 /*
342                  * For guest debugging where we have to reinject #BP if some
343                  * INT3 is guest-owned:
344                  * Emulate nRIP by moving RIP forward. Will fail if injection
345                  * raises a fault that is not intercepted. Still better than
346                  * failing in all cases.
347                  */
348                 (void)skip_emulated_instruction(&svm->vcpu);
349                 rip = kvm_rip_read(&svm->vcpu);
350                 svm->int3_rip = rip + svm->vmcb->save.cs.base;
351                 svm->int3_injected = rip - old_rip;
352         }
353
354         svm->vmcb->control.event_inj = nr
355                 | SVM_EVTINJ_VALID
356                 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
357                 | SVM_EVTINJ_TYPE_EXEPT;
358         svm->vmcb->control.event_inj_err = error_code;
359 }
360
361 static void svm_init_erratum_383(void)
362 {
363         u32 low, high;
364         int err;
365         u64 val;
366
367         if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
368                 return;
369
370         /* Use _safe variants to not break nested virtualization */
371         val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
372         if (err)
373                 return;
374
375         val |= (1ULL << 47);
376
377         low  = lower_32_bits(val);
378         high = upper_32_bits(val);
379
380         native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
381
382         erratum_383_found = true;
383 }
384
385 static void svm_init_osvw(struct kvm_vcpu *vcpu)
386 {
387         /*
388          * Guests should see errata 400 and 415 as fixed (assuming that
389          * HLT and IO instructions are intercepted).
390          */
391         vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
392         vcpu->arch.osvw.status = osvw_status & ~(6ULL);
393
394         /*
395          * By increasing VCPU's osvw.length to 3 we are telling the guest that
396          * all osvw.status bits inside that length, including bit 0 (which is
397          * reserved for erratum 298), are valid. However, if host processor's
398          * osvw_len is 0 then osvw_status[0] carries no information. We need to
399          * be conservative here and therefore we tell the guest that erratum 298
400          * is present (because we really don't know).
401          */
402         if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
403                 vcpu->arch.osvw.status |= 1;
404 }
405
406 static int has_svm(void)
407 {
408         const char *msg;
409
410         if (!cpu_has_svm(&msg)) {
411                 printk(KERN_INFO "has_svm: %s\n", msg);
412                 return 0;
413         }
414
415         return 1;
416 }
417
418 static void svm_hardware_disable(void)
419 {
420         /* Make sure we clean up behind us */
421         if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
422                 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
423
424         cpu_svm_disable();
425
426         amd_pmu_disable_virt();
427 }
428
429 static int svm_hardware_enable(void)
430 {
431
432         struct svm_cpu_data *sd;
433         uint64_t efer;
434         struct desc_struct *gdt;
435         int me = raw_smp_processor_id();
436
437         rdmsrl(MSR_EFER, efer);
438         if (efer & EFER_SVME)
439                 return -EBUSY;
440
441         if (!has_svm()) {
442                 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
443                 return -EINVAL;
444         }
445         sd = per_cpu(svm_data, me);
446         if (!sd) {
447                 pr_err("%s: svm_data is NULL on %d\n", __func__, me);
448                 return -EINVAL;
449         }
450
451         sd->asid_generation = 1;
452         sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
453         sd->next_asid = sd->max_asid + 1;
454         sd->min_asid = max_sev_asid + 1;
455
456         gdt = get_current_gdt_rw();
457         sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
458
459         wrmsrl(MSR_EFER, efer | EFER_SVME);
460
461         wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
462
463         if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
464                 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
465                 __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
466         }
467
468
469         /*
470          * Get OSVW bits.
471          *
472          * Note that it is possible to have a system with mixed processor
473          * revisions and therefore different OSVW bits. If bits are not the same
474          * on different processors then choose the worst case (i.e. if erratum
475          * is present on one processor and not on another then assume that the
476          * erratum is present everywhere).
477          */
478         if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
479                 uint64_t len, status = 0;
480                 int err;
481
482                 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
483                 if (!err)
484                         status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
485                                                       &err);
486
487                 if (err)
488                         osvw_status = osvw_len = 0;
489                 else {
490                         if (len < osvw_len)
491                                 osvw_len = len;
492                         osvw_status |= status;
493                         osvw_status &= (1ULL << osvw_len) - 1;
494                 }
495         } else
496                 osvw_status = osvw_len = 0;
497
498         svm_init_erratum_383();
499
500         amd_pmu_enable_virt();
501
502         return 0;
503 }
504
505 static void svm_cpu_uninit(int cpu)
506 {
507         struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
508
509         if (!sd)
510                 return;
511
512         per_cpu(svm_data, raw_smp_processor_id()) = NULL;
513         kfree(sd->sev_vmcbs);
514         __free_page(sd->save_area);
515         kfree(sd);
516 }
517
518 static int svm_cpu_init(int cpu)
519 {
520         struct svm_cpu_data *sd;
521
522         sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
523         if (!sd)
524                 return -ENOMEM;
525         sd->cpu = cpu;
526         sd->save_area = alloc_page(GFP_KERNEL);
527         if (!sd->save_area)
528                 goto free_cpu_data;
529
530         if (svm_sev_enabled()) {
531                 sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
532                                               sizeof(void *),
533                                               GFP_KERNEL);
534                 if (!sd->sev_vmcbs)
535                         goto free_save_area;
536         }
537
538         per_cpu(svm_data, cpu) = sd;
539
540         return 0;
541
542 free_save_area:
543         __free_page(sd->save_area);
544 free_cpu_data:
545         kfree(sd);
546         return -ENOMEM;
547
548 }
549
550 static bool valid_msr_intercept(u32 index)
551 {
552         int i;
553
554         for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
555                 if (direct_access_msrs[i].index == index)
556                         return true;
557
558         return false;
559 }
560
561 static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
562 {
563         u8 bit_write;
564         unsigned long tmp;
565         u32 offset;
566         u32 *msrpm;
567
568         msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
569                                       to_svm(vcpu)->msrpm;
570
571         offset    = svm_msrpm_offset(msr);
572         bit_write = 2 * (msr & 0x0f) + 1;
573         tmp       = msrpm[offset];
574
575         BUG_ON(offset == MSR_INVALID);
576
577         return !!test_bit(bit_write,  &tmp);
578 }
579
580 static void set_msr_interception(u32 *msrpm, unsigned msr,
581                                  int read, int write)
582 {
583         u8 bit_read, bit_write;
584         unsigned long tmp;
585         u32 offset;
586
587         /*
588          * If this warning triggers extend the direct_access_msrs list at the
589          * beginning of the file
590          */
591         WARN_ON(!valid_msr_intercept(msr));
592
593         offset    = svm_msrpm_offset(msr);
594         bit_read  = 2 * (msr & 0x0f);
595         bit_write = 2 * (msr & 0x0f) + 1;
596         tmp       = msrpm[offset];
597
598         BUG_ON(offset == MSR_INVALID);
599
600         read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
601         write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
602
603         msrpm[offset] = tmp;
604 }
605
606 static void svm_vcpu_init_msrpm(u32 *msrpm)
607 {
608         int i;
609
610         memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
611
612         for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
613                 if (!direct_access_msrs[i].always)
614                         continue;
615
616                 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
617         }
618 }
619
620 static void add_msr_offset(u32 offset)
621 {
622         int i;
623
624         for (i = 0; i < MSRPM_OFFSETS; ++i) {
625
626                 /* Offset already in list? */
627                 if (msrpm_offsets[i] == offset)
628                         return;
629
630                 /* Slot used by another offset? */
631                 if (msrpm_offsets[i] != MSR_INVALID)
632                         continue;
633
634                 /* Add offset to list */
635                 msrpm_offsets[i] = offset;
636
637                 return;
638         }
639
640         /*
641          * If this BUG triggers the msrpm_offsets table has an overflow. Just
642          * increase MSRPM_OFFSETS in this case.
643          */
644         BUG();
645 }
646
647 static void init_msrpm_offsets(void)
648 {
649         int i;
650
651         memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
652
653         for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
654                 u32 offset;
655
656                 offset = svm_msrpm_offset(direct_access_msrs[i].index);
657                 BUG_ON(offset == MSR_INVALID);
658
659                 add_msr_offset(offset);
660         }
661 }
662
663 static void svm_enable_lbrv(struct vcpu_svm *svm)
664 {
665         u32 *msrpm = svm->msrpm;
666
667         svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
668         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
669         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
670         set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
671         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
672 }
673
674 static void svm_disable_lbrv(struct vcpu_svm *svm)
675 {
676         u32 *msrpm = svm->msrpm;
677
678         svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
679         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
680         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
681         set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
682         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
683 }
684
685 void disable_nmi_singlestep(struct vcpu_svm *svm)
686 {
687         svm->nmi_singlestep = false;
688
689         if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
690                 /* Clear our flags if they were not set by the guest */
691                 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
692                         svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
693                 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
694                         svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
695         }
696 }
697
698 static void grow_ple_window(struct kvm_vcpu *vcpu)
699 {
700         struct vcpu_svm *svm = to_svm(vcpu);
701         struct vmcb_control_area *control = &svm->vmcb->control;
702         int old = control->pause_filter_count;
703
704         control->pause_filter_count = __grow_ple_window(old,
705                                                         pause_filter_count,
706                                                         pause_filter_count_grow,
707                                                         pause_filter_count_max);
708
709         if (control->pause_filter_count != old) {
710                 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
711                 trace_kvm_ple_window_update(vcpu->vcpu_id,
712                                             control->pause_filter_count, old);
713         }
714 }
715
716 static void shrink_ple_window(struct kvm_vcpu *vcpu)
717 {
718         struct vcpu_svm *svm = to_svm(vcpu);
719         struct vmcb_control_area *control = &svm->vmcb->control;
720         int old = control->pause_filter_count;
721
722         control->pause_filter_count =
723                                 __shrink_ple_window(old,
724                                                     pause_filter_count,
725                                                     pause_filter_count_shrink,
726                                                     pause_filter_count);
727         if (control->pause_filter_count != old) {
728                 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
729                 trace_kvm_ple_window_update(vcpu->vcpu_id,
730                                             control->pause_filter_count, old);
731         }
732 }
733
734 /*
735  * The default MMIO mask is a single bit (excluding the present bit),
736  * which could conflict with the memory encryption bit. Check for
737  * memory encryption support and override the default MMIO mask if
738  * memory encryption is enabled.
739  */
740 static __init void svm_adjust_mmio_mask(void)
741 {
742         unsigned int enc_bit, mask_bit;
743         u64 msr, mask;
744
745         /* If there is no memory encryption support, use existing mask */
746         if (cpuid_eax(0x80000000) < 0x8000001f)
747                 return;
748
749         /* If memory encryption is not enabled, use existing mask */
750         rdmsrl(MSR_K8_SYSCFG, msr);
751         if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
752                 return;
753
754         enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
755         mask_bit = boot_cpu_data.x86_phys_bits;
756
757         /* Increment the mask bit if it is the same as the encryption bit */
758         if (enc_bit == mask_bit)
759                 mask_bit++;
760
761         /*
762          * If the mask bit location is below 52, then some bits above the
763          * physical addressing limit will always be reserved, so use the
764          * rsvd_bits() function to generate the mask. This mask, along with
765          * the present bit, will be used to generate a page fault with
766          * PFER.RSV = 1.
767          *
768          * If the mask bit location is 52 (or above), then clear the mask.
769          */
770         mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
771
772         kvm_mmu_set_mmio_spte_mask(mask, PT_WRITABLE_MASK | PT_USER_MASK);
773 }
774
775 static void svm_hardware_teardown(void)
776 {
777         int cpu;
778
779         if (svm_sev_enabled())
780                 sev_hardware_teardown();
781
782         for_each_possible_cpu(cpu)
783                 svm_cpu_uninit(cpu);
784
785         __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
786         iopm_base = 0;
787 }
788
789 static __init void svm_set_cpu_caps(void)
790 {
791         kvm_set_cpu_caps();
792
793         supported_xss = 0;
794
795         /* CPUID 0x80000001 and 0x8000000A (SVM features) */
796         if (nested) {
797                 kvm_cpu_cap_set(X86_FEATURE_SVM);
798
799                 if (nrips)
800                         kvm_cpu_cap_set(X86_FEATURE_NRIPS);
801
802                 if (npt_enabled)
803                         kvm_cpu_cap_set(X86_FEATURE_NPT);
804         }
805
806         /* CPUID 0x80000008 */
807         if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
808             boot_cpu_has(X86_FEATURE_AMD_SSBD))
809                 kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
810 }
811
812 static __init int svm_hardware_setup(void)
813 {
814         int cpu;
815         struct page *iopm_pages;
816         void *iopm_va;
817         int r;
818
819         iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
820
821         if (!iopm_pages)
822                 return -ENOMEM;
823
824         iopm_va = page_address(iopm_pages);
825         memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
826         iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
827
828         init_msrpm_offsets();
829
830         supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
831
832         if (boot_cpu_has(X86_FEATURE_NX))
833                 kvm_enable_efer_bits(EFER_NX);
834
835         if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
836                 kvm_enable_efer_bits(EFER_FFXSR);
837
838         if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
839                 kvm_has_tsc_control = true;
840                 kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
841                 kvm_tsc_scaling_ratio_frac_bits = 32;
842         }
843
844         /* Check for pause filtering support */
845         if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
846                 pause_filter_count = 0;
847                 pause_filter_thresh = 0;
848         } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
849                 pause_filter_thresh = 0;
850         }
851
852         if (nested) {
853                 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
854                 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
855         }
856
857         if (sev) {
858                 if (boot_cpu_has(X86_FEATURE_SEV) &&
859                     IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
860                         r = sev_hardware_setup();
861                         if (r)
862                                 sev = false;
863                 } else {
864                         sev = false;
865                 }
866         }
867
868         svm_adjust_mmio_mask();
869
870         for_each_possible_cpu(cpu) {
871                 r = svm_cpu_init(cpu);
872                 if (r)
873                         goto err;
874         }
875
876         if (!boot_cpu_has(X86_FEATURE_NPT))
877                 npt_enabled = false;
878
879         if (npt_enabled && !npt)
880                 npt_enabled = false;
881
882         kvm_configure_mmu(npt_enabled, PG_LEVEL_1G);
883         pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
884
885         if (nrips) {
886                 if (!boot_cpu_has(X86_FEATURE_NRIPS))
887                         nrips = false;
888         }
889
890         if (avic) {
891                 if (!npt_enabled ||
892                     !boot_cpu_has(X86_FEATURE_AVIC) ||
893                     !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
894                         avic = false;
895                 } else {
896                         pr_info("AVIC enabled\n");
897
898                         amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
899                 }
900         }
901
902         if (vls) {
903                 if (!npt_enabled ||
904                     !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
905                     !IS_ENABLED(CONFIG_X86_64)) {
906                         vls = false;
907                 } else {
908                         pr_info("Virtual VMLOAD VMSAVE supported\n");
909                 }
910         }
911
912         if (vgif) {
913                 if (!boot_cpu_has(X86_FEATURE_VGIF))
914                         vgif = false;
915                 else
916                         pr_info("Virtual GIF supported\n");
917         }
918
919         svm_set_cpu_caps();
920
921         return 0;
922
923 err:
924         svm_hardware_teardown();
925         return r;
926 }
927
928 static void init_seg(struct vmcb_seg *seg)
929 {
930         seg->selector = 0;
931         seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
932                       SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
933         seg->limit = 0xffff;
934         seg->base = 0;
935 }
936
937 static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
938 {
939         seg->selector = 0;
940         seg->attrib = SVM_SELECTOR_P_MASK | type;
941         seg->limit = 0xffff;
942         seg->base = 0;
943 }
944
945 static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
946 {
947         struct vcpu_svm *svm = to_svm(vcpu);
948         u64 g_tsc_offset = 0;
949
950         if (is_guest_mode(vcpu)) {
951                 /* Write L1's TSC offset.  */
952                 g_tsc_offset = svm->vmcb->control.tsc_offset -
953                                svm->nested.hsave->control.tsc_offset;
954                 svm->nested.hsave->control.tsc_offset = offset;
955         }
956
957         trace_kvm_write_tsc_offset(vcpu->vcpu_id,
958                                    svm->vmcb->control.tsc_offset - g_tsc_offset,
959                                    offset);
960
961         svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
962
963         mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
964         return svm->vmcb->control.tsc_offset;
965 }
966
967 static void init_vmcb(struct vcpu_svm *svm)
968 {
969         struct vmcb_control_area *control = &svm->vmcb->control;
970         struct vmcb_save_area *save = &svm->vmcb->save;
971
972         svm->vcpu.arch.hflags = 0;
973
974         set_cr_intercept(svm, INTERCEPT_CR0_READ);
975         set_cr_intercept(svm, INTERCEPT_CR3_READ);
976         set_cr_intercept(svm, INTERCEPT_CR4_READ);
977         set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
978         set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
979         set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
980         if (!kvm_vcpu_apicv_active(&svm->vcpu))
981                 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
982
983         set_dr_intercepts(svm);
984
985         set_exception_intercept(svm, PF_VECTOR);
986         set_exception_intercept(svm, UD_VECTOR);
987         set_exception_intercept(svm, MC_VECTOR);
988         set_exception_intercept(svm, AC_VECTOR);
989         set_exception_intercept(svm, DB_VECTOR);
990         /*
991          * Guest access to VMware backdoor ports could legitimately
992          * trigger #GP because of TSS I/O permission bitmap.
993          * We intercept those #GP and allow access to them anyway
994          * as VMware does.
995          */
996         if (enable_vmware_backdoor)
997                 set_exception_intercept(svm, GP_VECTOR);
998
999         set_intercept(svm, INTERCEPT_INTR);
1000         set_intercept(svm, INTERCEPT_NMI);
1001         set_intercept(svm, INTERCEPT_SMI);
1002         set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
1003         set_intercept(svm, INTERCEPT_RDPMC);
1004         set_intercept(svm, INTERCEPT_CPUID);
1005         set_intercept(svm, INTERCEPT_INVD);
1006         set_intercept(svm, INTERCEPT_INVLPG);
1007         set_intercept(svm, INTERCEPT_INVLPGA);
1008         set_intercept(svm, INTERCEPT_IOIO_PROT);
1009         set_intercept(svm, INTERCEPT_MSR_PROT);
1010         set_intercept(svm, INTERCEPT_TASK_SWITCH);
1011         set_intercept(svm, INTERCEPT_SHUTDOWN);
1012         set_intercept(svm, INTERCEPT_VMRUN);
1013         set_intercept(svm, INTERCEPT_VMMCALL);
1014         set_intercept(svm, INTERCEPT_VMLOAD);
1015         set_intercept(svm, INTERCEPT_VMSAVE);
1016         set_intercept(svm, INTERCEPT_STGI);
1017         set_intercept(svm, INTERCEPT_CLGI);
1018         set_intercept(svm, INTERCEPT_SKINIT);
1019         set_intercept(svm, INTERCEPT_WBINVD);
1020         set_intercept(svm, INTERCEPT_XSETBV);
1021         set_intercept(svm, INTERCEPT_RDPRU);
1022         set_intercept(svm, INTERCEPT_RSM);
1023
1024         if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
1025                 set_intercept(svm, INTERCEPT_MONITOR);
1026                 set_intercept(svm, INTERCEPT_MWAIT);
1027         }
1028
1029         if (!kvm_hlt_in_guest(svm->vcpu.kvm))
1030                 set_intercept(svm, INTERCEPT_HLT);
1031
1032         control->iopm_base_pa = __sme_set(iopm_base);
1033         control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
1034         control->int_ctl = V_INTR_MASKING_MASK;
1035
1036         init_seg(&save->es);
1037         init_seg(&save->ss);
1038         init_seg(&save->ds);
1039         init_seg(&save->fs);
1040         init_seg(&save->gs);
1041
1042         save->cs.selector = 0xf000;
1043         save->cs.base = 0xffff0000;
1044         /* Executable/Readable Code Segment */
1045         save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1046                 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1047         save->cs.limit = 0xffff;
1048
1049         save->gdtr.limit = 0xffff;
1050         save->idtr.limit = 0xffff;
1051
1052         init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1053         init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1054
1055         svm_set_efer(&svm->vcpu, 0);
1056         save->dr6 = 0xffff0ff0;
1057         kvm_set_rflags(&svm->vcpu, 2);
1058         save->rip = 0x0000fff0;
1059         svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
1060
1061         /*
1062          * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
1063          * It also updates the guest-visible cr0 value.
1064          */
1065         svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
1066         kvm_mmu_reset_context(&svm->vcpu);
1067
1068         save->cr4 = X86_CR4_PAE;
1069         /* rdx = ?? */
1070
1071         if (npt_enabled) {
1072                 /* Setup VMCB for Nested Paging */
1073                 control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
1074                 clr_intercept(svm, INTERCEPT_INVLPG);
1075                 clr_exception_intercept(svm, PF_VECTOR);
1076                 clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1077                 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1078                 save->g_pat = svm->vcpu.arch.pat;
1079                 save->cr3 = 0;
1080                 save->cr4 = 0;
1081         }
1082         svm->asid_generation = 0;
1083
1084         svm->nested.vmcb = 0;
1085         svm->vcpu.arch.hflags = 0;
1086
1087         if (pause_filter_count) {
1088                 control->pause_filter_count = pause_filter_count;
1089                 if (pause_filter_thresh)
1090                         control->pause_filter_thresh = pause_filter_thresh;
1091                 set_intercept(svm, INTERCEPT_PAUSE);
1092         } else {
1093                 clr_intercept(svm, INTERCEPT_PAUSE);
1094         }
1095
1096         if (kvm_vcpu_apicv_active(&svm->vcpu))
1097                 avic_init_vmcb(svm);
1098
1099         /*
1100          * If hardware supports Virtual VMLOAD VMSAVE then enable it
1101          * in VMCB and clear intercepts to avoid #VMEXIT.
1102          */
1103         if (vls) {
1104                 clr_intercept(svm, INTERCEPT_VMLOAD);
1105                 clr_intercept(svm, INTERCEPT_VMSAVE);
1106                 svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
1107         }
1108
1109         if (vgif) {
1110                 clr_intercept(svm, INTERCEPT_STGI);
1111                 clr_intercept(svm, INTERCEPT_CLGI);
1112                 svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
1113         }
1114
1115         if (sev_guest(svm->vcpu.kvm)) {
1116                 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
1117                 clr_exception_intercept(svm, UD_VECTOR);
1118         }
1119
1120         mark_all_dirty(svm->vmcb);
1121
1122         enable_gif(svm);
1123
1124 }
1125
1126 static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
1127 {
1128         struct vcpu_svm *svm = to_svm(vcpu);
1129         u32 dummy;
1130         u32 eax = 1;
1131
1132         svm->spec_ctrl = 0;
1133         svm->virt_spec_ctrl = 0;
1134
1135         if (!init_event) {
1136                 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
1137                                            MSR_IA32_APICBASE_ENABLE;
1138                 if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
1139                         svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1140         }
1141         init_vmcb(svm);
1142
1143         kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, false);
1144         kvm_rdx_write(vcpu, eax);
1145
1146         if (kvm_vcpu_apicv_active(vcpu) && !init_event)
1147                 avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
1148 }
1149
1150 static int svm_create_vcpu(struct kvm_vcpu *vcpu)
1151 {
1152         struct vcpu_svm *svm;
1153         struct page *page;
1154         struct page *msrpm_pages;
1155         struct page *hsave_page;
1156         struct page *nested_msrpm_pages;
1157         int err;
1158
1159         BUILD_BUG_ON(offsetof(struct vcpu_svm, vcpu) != 0);
1160         svm = to_svm(vcpu);
1161
1162         err = -ENOMEM;
1163         page = alloc_page(GFP_KERNEL_ACCOUNT);
1164         if (!page)
1165                 goto out;
1166
1167         msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
1168         if (!msrpm_pages)
1169                 goto free_page1;
1170
1171         nested_msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
1172         if (!nested_msrpm_pages)
1173                 goto free_page2;
1174
1175         hsave_page = alloc_page(GFP_KERNEL_ACCOUNT);
1176         if (!hsave_page)
1177                 goto free_page3;
1178
1179         err = avic_init_vcpu(svm);
1180         if (err)
1181                 goto free_page4;
1182
1183         /* We initialize this flag to true to make sure that the is_running
1184          * bit would be set the first time the vcpu is loaded.
1185          */
1186         if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm))
1187                 svm->avic_is_running = true;
1188
1189         svm->nested.hsave = page_address(hsave_page);
1190
1191         svm->msrpm = page_address(msrpm_pages);
1192         svm_vcpu_init_msrpm(svm->msrpm);
1193
1194         svm->nested.msrpm = page_address(nested_msrpm_pages);
1195         svm_vcpu_init_msrpm(svm->nested.msrpm);
1196
1197         svm->vmcb = page_address(page);
1198         clear_page(svm->vmcb);
1199         svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
1200         svm->asid_generation = 0;
1201         init_vmcb(svm);
1202
1203         svm_init_osvw(vcpu);
1204         vcpu->arch.microcode_version = 0x01000065;
1205
1206         return 0;
1207
1208 free_page4:
1209         __free_page(hsave_page);
1210 free_page3:
1211         __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
1212 free_page2:
1213         __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
1214 free_page1:
1215         __free_page(page);
1216 out:
1217         return err;
1218 }
1219
1220 static void svm_clear_current_vmcb(struct vmcb *vmcb)
1221 {
1222         int i;
1223
1224         for_each_online_cpu(i)
1225                 cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
1226 }
1227
1228 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
1229 {
1230         struct vcpu_svm *svm = to_svm(vcpu);
1231
1232         /*
1233          * The vmcb page can be recycled, causing a false negative in
1234          * svm_vcpu_load(). So, ensure that no logical CPU has this
1235          * vmcb page recorded as its current vmcb.
1236          */
1237         svm_clear_current_vmcb(svm->vmcb);
1238
1239         __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
1240         __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
1241         __free_page(virt_to_page(svm->nested.hsave));
1242         __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
1243 }
1244
1245 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1246 {
1247         struct vcpu_svm *svm = to_svm(vcpu);
1248         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
1249         int i;
1250
1251         if (unlikely(cpu != vcpu->cpu)) {
1252                 svm->asid_generation = 0;
1253                 mark_all_dirty(svm->vmcb);
1254         }
1255
1256 #ifdef CONFIG_X86_64
1257         rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
1258 #endif
1259         savesegment(fs, svm->host.fs);
1260         savesegment(gs, svm->host.gs);
1261         svm->host.ldt = kvm_read_ldt();
1262
1263         for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1264                 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1265
1266         if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
1267                 u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
1268                 if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
1269                         __this_cpu_write(current_tsc_ratio, tsc_ratio);
1270                         wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
1271                 }
1272         }
1273         /* This assumes that the kernel never uses MSR_TSC_AUX */
1274         if (static_cpu_has(X86_FEATURE_RDTSCP))
1275                 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
1276
1277         if (sd->current_vmcb != svm->vmcb) {
1278                 sd->current_vmcb = svm->vmcb;
1279                 indirect_branch_prediction_barrier();
1280         }
1281         avic_vcpu_load(vcpu, cpu);
1282 }
1283
1284 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1285 {
1286         struct vcpu_svm *svm = to_svm(vcpu);
1287         int i;
1288
1289         avic_vcpu_put(vcpu);
1290
1291         ++vcpu->stat.host_state_reload;
1292         kvm_load_ldt(svm->host.ldt);
1293 #ifdef CONFIG_X86_64
1294         loadsegment(fs, svm->host.fs);
1295         wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
1296         load_gs_index(svm->host.gs);
1297 #else
1298 #ifdef CONFIG_X86_32_LAZY_GS
1299         loadsegment(gs, svm->host.gs);
1300 #endif
1301 #endif
1302         for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1303                 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1304 }
1305
1306 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1307 {
1308         struct vcpu_svm *svm = to_svm(vcpu);
1309         unsigned long rflags = svm->vmcb->save.rflags;
1310
1311         if (svm->nmi_singlestep) {
1312                 /* Hide our flags if they were not set by the guest */
1313                 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
1314                         rflags &= ~X86_EFLAGS_TF;
1315                 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
1316                         rflags &= ~X86_EFLAGS_RF;
1317         }
1318         return rflags;
1319 }
1320
1321 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1322 {
1323         if (to_svm(vcpu)->nmi_singlestep)
1324                 rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
1325
1326        /*
1327         * Any change of EFLAGS.VM is accompanied by a reload of SS
1328         * (caused by either a task switch or an inter-privilege IRET),
1329         * so we do not need to update the CPL here.
1330         */
1331         to_svm(vcpu)->vmcb->save.rflags = rflags;
1332 }
1333
1334 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1335 {
1336         switch (reg) {
1337         case VCPU_EXREG_PDPTR:
1338                 BUG_ON(!npt_enabled);
1339                 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
1340                 break;
1341         default:
1342                 WARN_ON_ONCE(1);
1343         }
1344 }
1345
1346 static void svm_set_vintr(struct vcpu_svm *svm)
1347 {
1348         struct vmcb_control_area *control;
1349
1350         /* The following fields are ignored when AVIC is enabled */
1351         WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu));
1352         set_intercept(svm, INTERCEPT_VINTR);
1353
1354         /*
1355          * This is just a dummy VINTR to actually cause a vmexit to happen.
1356          * Actual injection of virtual interrupts happens through EVENTINJ.
1357          */
1358         control = &svm->vmcb->control;
1359         control->int_vector = 0x0;
1360         control->int_ctl &= ~V_INTR_PRIO_MASK;
1361         control->int_ctl |= V_IRQ_MASK |
1362                 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
1363         mark_dirty(svm->vmcb, VMCB_INTR);
1364 }
1365
1366 static void svm_clear_vintr(struct vcpu_svm *svm)
1367 {
1368         clr_intercept(svm, INTERCEPT_VINTR);
1369
1370         svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
1371         mark_dirty(svm->vmcb, VMCB_INTR);
1372 }
1373
1374 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
1375 {
1376         struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1377
1378         switch (seg) {
1379         case VCPU_SREG_CS: return &save->cs;
1380         case VCPU_SREG_DS: return &save->ds;
1381         case VCPU_SREG_ES: return &save->es;
1382         case VCPU_SREG_FS: return &save->fs;
1383         case VCPU_SREG_GS: return &save->gs;
1384         case VCPU_SREG_SS: return &save->ss;
1385         case VCPU_SREG_TR: return &save->tr;
1386         case VCPU_SREG_LDTR: return &save->ldtr;
1387         }
1388         BUG();
1389         return NULL;
1390 }
1391
1392 static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1393 {
1394         struct vmcb_seg *s = svm_seg(vcpu, seg);
1395
1396         return s->base;
1397 }
1398
1399 static void svm_get_segment(struct kvm_vcpu *vcpu,
1400                             struct kvm_segment *var, int seg)
1401 {
1402         struct vmcb_seg *s = svm_seg(vcpu, seg);
1403
1404         var->base = s->base;
1405         var->limit = s->limit;
1406         var->selector = s->selector;
1407         var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
1408         var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
1409         var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1410         var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
1411         var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1412         var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1413         var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
1414
1415         /*
1416          * AMD CPUs circa 2014 track the G bit for all segments except CS.
1417          * However, the SVM spec states that the G bit is not observed by the
1418          * CPU, and some VMware virtual CPUs drop the G bit for all segments.
1419          * So let's synthesize a legal G bit for all segments, this helps
1420          * running KVM nested. It also helps cross-vendor migration, because
1421          * Intel's vmentry has a check on the 'G' bit.
1422          */
1423         var->g = s->limit > 0xfffff;
1424
1425         /*
1426          * AMD's VMCB does not have an explicit unusable field, so emulate it
1427          * for cross vendor migration purposes by "not present"
1428          */
1429         var->unusable = !var->present;
1430
1431         switch (seg) {
1432         case VCPU_SREG_TR:
1433                 /*
1434                  * Work around a bug where the busy flag in the tr selector
1435                  * isn't exposed
1436                  */
1437                 var->type |= 0x2;
1438                 break;
1439         case VCPU_SREG_DS:
1440         case VCPU_SREG_ES:
1441         case VCPU_SREG_FS:
1442         case VCPU_SREG_GS:
1443                 /*
1444                  * The accessed bit must always be set in the segment
1445                  * descriptor cache, although it can be cleared in the
1446                  * descriptor, the cached bit always remains at 1. Since
1447                  * Intel has a check on this, set it here to support
1448                  * cross-vendor migration.
1449                  */
1450                 if (!var->unusable)
1451                         var->type |= 0x1;
1452                 break;
1453         case VCPU_SREG_SS:
1454                 /*
1455                  * On AMD CPUs sometimes the DB bit in the segment
1456                  * descriptor is left as 1, although the whole segment has
1457                  * been made unusable. Clear it here to pass an Intel VMX
1458                  * entry check when cross vendor migrating.
1459                  */
1460                 if (var->unusable)
1461                         var->db = 0;
1462                 /* This is symmetric with svm_set_segment() */
1463                 var->dpl = to_svm(vcpu)->vmcb->save.cpl;
1464                 break;
1465         }
1466 }
1467
1468 static int svm_get_cpl(struct kvm_vcpu *vcpu)
1469 {
1470         struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1471
1472         return save->cpl;
1473 }
1474
1475 static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1476 {
1477         struct vcpu_svm *svm = to_svm(vcpu);
1478
1479         dt->size = svm->vmcb->save.idtr.limit;
1480         dt->address = svm->vmcb->save.idtr.base;
1481 }
1482
1483 static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1484 {
1485         struct vcpu_svm *svm = to_svm(vcpu);
1486
1487         svm->vmcb->save.idtr.limit = dt->size;
1488         svm->vmcb->save.idtr.base = dt->address ;
1489         mark_dirty(svm->vmcb, VMCB_DT);
1490 }
1491
1492 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1493 {
1494         struct vcpu_svm *svm = to_svm(vcpu);
1495
1496         dt->size = svm->vmcb->save.gdtr.limit;
1497         dt->address = svm->vmcb->save.gdtr.base;
1498 }
1499
1500 static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1501 {
1502         struct vcpu_svm *svm = to_svm(vcpu);
1503
1504         svm->vmcb->save.gdtr.limit = dt->size;
1505         svm->vmcb->save.gdtr.base = dt->address ;
1506         mark_dirty(svm->vmcb, VMCB_DT);
1507 }
1508
1509 static void update_cr0_intercept(struct vcpu_svm *svm)
1510 {
1511         ulong gcr0 = svm->vcpu.arch.cr0;
1512         u64 *hcr0 = &svm->vmcb->save.cr0;
1513
1514         *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1515                 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
1516
1517         mark_dirty(svm->vmcb, VMCB_CR);
1518
1519         if (gcr0 == *hcr0) {
1520                 clr_cr_intercept(svm, INTERCEPT_CR0_READ);
1521                 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1522         } else {
1523                 set_cr_intercept(svm, INTERCEPT_CR0_READ);
1524                 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1525         }
1526 }
1527
1528 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1529 {
1530         struct vcpu_svm *svm = to_svm(vcpu);
1531
1532 #ifdef CONFIG_X86_64
1533         if (vcpu->arch.efer & EFER_LME) {
1534                 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
1535                         vcpu->arch.efer |= EFER_LMA;
1536                         svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
1537                 }
1538
1539                 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
1540                         vcpu->arch.efer &= ~EFER_LMA;
1541                         svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
1542                 }
1543         }
1544 #endif
1545         vcpu->arch.cr0 = cr0;
1546
1547         if (!npt_enabled)
1548                 cr0 |= X86_CR0_PG | X86_CR0_WP;
1549
1550         /*
1551          * re-enable caching here because the QEMU bios
1552          * does not do it - this results in some delay at
1553          * reboot
1554          */
1555         if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
1556                 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1557         svm->vmcb->save.cr0 = cr0;
1558         mark_dirty(svm->vmcb, VMCB_CR);
1559         update_cr0_intercept(svm);
1560 }
1561
1562 int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1563 {
1564         unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
1565         unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
1566
1567         if (cr4 & X86_CR4_VMXE)
1568                 return 1;
1569
1570         if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
1571                 svm_flush_tlb(vcpu);
1572
1573         vcpu->arch.cr4 = cr4;
1574         if (!npt_enabled)
1575                 cr4 |= X86_CR4_PAE;
1576         cr4 |= host_cr4_mce;
1577         to_svm(vcpu)->vmcb->save.cr4 = cr4;
1578         mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
1579         return 0;
1580 }
1581
1582 static void svm_set_segment(struct kvm_vcpu *vcpu,
1583                             struct kvm_segment *var, int seg)
1584 {
1585         struct vcpu_svm *svm = to_svm(vcpu);
1586         struct vmcb_seg *s = svm_seg(vcpu, seg);
1587
1588         s->base = var->base;
1589         s->limit = var->limit;
1590         s->selector = var->selector;
1591         s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1592         s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1593         s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1594         s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
1595         s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1596         s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1597         s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1598         s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1599
1600         /*
1601          * This is always accurate, except if SYSRET returned to a segment
1602          * with SS.DPL != 3.  Intel does not have this quirk, and always
1603          * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it
1604          * would entail passing the CPL to userspace and back.
1605          */
1606         if (seg == VCPU_SREG_SS)
1607                 /* This is symmetric with svm_get_segment() */
1608                 svm->vmcb->save.cpl = (var->dpl & 3);
1609
1610         mark_dirty(svm->vmcb, VMCB_SEG);
1611 }
1612
1613 static void update_bp_intercept(struct kvm_vcpu *vcpu)
1614 {
1615         struct vcpu_svm *svm = to_svm(vcpu);
1616
1617         clr_exception_intercept(svm, BP_VECTOR);
1618
1619         if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
1620                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
1621                         set_exception_intercept(svm, BP_VECTOR);
1622         } else
1623                 vcpu->guest_debug = 0;
1624 }
1625
1626 static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1627 {
1628         if (sd->next_asid > sd->max_asid) {
1629                 ++sd->asid_generation;
1630                 sd->next_asid = sd->min_asid;
1631                 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1632         }
1633
1634         svm->asid_generation = sd->asid_generation;
1635         svm->vmcb->control.asid = sd->next_asid++;
1636
1637         mark_dirty(svm->vmcb, VMCB_ASID);
1638 }
1639
1640 static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value)
1641 {
1642         struct vmcb *vmcb = svm->vmcb;
1643
1644         if (unlikely(value != vmcb->save.dr6)) {
1645                 vmcb->save.dr6 = value;
1646                 mark_dirty(vmcb, VMCB_DR);
1647         }
1648 }
1649
1650 static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
1651 {
1652         struct vcpu_svm *svm = to_svm(vcpu);
1653
1654         get_debugreg(vcpu->arch.db[0], 0);
1655         get_debugreg(vcpu->arch.db[1], 1);
1656         get_debugreg(vcpu->arch.db[2], 2);
1657         get_debugreg(vcpu->arch.db[3], 3);
1658         /*
1659          * We cannot reset svm->vmcb->save.dr6 to DR6_FIXED_1|DR6_RTM here,
1660          * because db_interception might need it.  We can do it before vmentry.
1661          */
1662         vcpu->arch.dr6 = svm->vmcb->save.dr6;
1663         vcpu->arch.dr7 = svm->vmcb->save.dr7;
1664         vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
1665         set_dr_intercepts(svm);
1666 }
1667
1668 static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1669 {
1670         struct vcpu_svm *svm = to_svm(vcpu);
1671
1672         svm->vmcb->save.dr7 = value;
1673         mark_dirty(svm->vmcb, VMCB_DR);
1674 }
1675
1676 static int pf_interception(struct vcpu_svm *svm)
1677 {
1678         u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
1679         u64 error_code = svm->vmcb->control.exit_info_1;
1680
1681         return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
1682                         static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
1683                         svm->vmcb->control.insn_bytes : NULL,
1684                         svm->vmcb->control.insn_len);
1685 }
1686
1687 static int npf_interception(struct vcpu_svm *svm)
1688 {
1689         u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
1690         u64 error_code = svm->vmcb->control.exit_info_1;
1691
1692         trace_kvm_page_fault(fault_address, error_code);
1693         return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
1694                         static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
1695                         svm->vmcb->control.insn_bytes : NULL,
1696                         svm->vmcb->control.insn_len);
1697 }
1698
1699 static int db_interception(struct vcpu_svm *svm)
1700 {
1701         struct kvm_run *kvm_run = svm->vcpu.run;
1702         struct kvm_vcpu *vcpu = &svm->vcpu;
1703
1704         if (!(svm->vcpu.guest_debug &
1705               (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1706                 !svm->nmi_singlestep) {
1707                 u32 payload = (svm->vmcb->save.dr6 ^ DR6_RTM) & ~DR6_FIXED_1;
1708                 kvm_queue_exception_p(&svm->vcpu, DB_VECTOR, payload);
1709                 return 1;
1710         }
1711
1712         if (svm->nmi_singlestep) {
1713                 disable_nmi_singlestep(svm);
1714                 /* Make sure we check for pending NMIs upon entry */
1715                 kvm_make_request(KVM_REQ_EVENT, vcpu);
1716         }
1717
1718         if (svm->vcpu.guest_debug &
1719             (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
1720                 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1721                 kvm_run->debug.arch.dr6 = svm->vmcb->save.dr6;
1722                 kvm_run->debug.arch.dr7 = svm->vmcb->save.dr7;
1723                 kvm_run->debug.arch.pc =
1724                         svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1725                 kvm_run->debug.arch.exception = DB_VECTOR;
1726                 return 0;
1727         }
1728
1729         return 1;
1730 }
1731
1732 static int bp_interception(struct vcpu_svm *svm)
1733 {
1734         struct kvm_run *kvm_run = svm->vcpu.run;
1735
1736         kvm_run->exit_reason = KVM_EXIT_DEBUG;
1737         kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1738         kvm_run->debug.arch.exception = BP_VECTOR;
1739         return 0;
1740 }
1741
1742 static int ud_interception(struct vcpu_svm *svm)
1743 {
1744         return handle_ud(&svm->vcpu);
1745 }
1746
1747 static int ac_interception(struct vcpu_svm *svm)
1748 {
1749         kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
1750         return 1;
1751 }
1752
1753 static int gp_interception(struct vcpu_svm *svm)
1754 {
1755         struct kvm_vcpu *vcpu = &svm->vcpu;
1756         u32 error_code = svm->vmcb->control.exit_info_1;
1757
1758         WARN_ON_ONCE(!enable_vmware_backdoor);
1759
1760         /*
1761          * VMware backdoor emulation on #GP interception only handles IN{S},
1762          * OUT{S}, and RDPMC, none of which generate a non-zero error code.
1763          */
1764         if (error_code) {
1765                 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
1766                 return 1;
1767         }
1768         return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
1769 }
1770
1771 static bool is_erratum_383(void)
1772 {
1773         int err, i;
1774         u64 value;
1775
1776         if (!erratum_383_found)
1777                 return false;
1778
1779         value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
1780         if (err)
1781                 return false;
1782
1783         /* Bit 62 may or may not be set for this mce */
1784         value &= ~(1ULL << 62);
1785
1786         if (value != 0xb600000000010015ULL)
1787                 return false;
1788
1789         /* Clear MCi_STATUS registers */
1790         for (i = 0; i < 6; ++i)
1791                 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
1792
1793         value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
1794         if (!err) {
1795                 u32 low, high;
1796
1797                 value &= ~(1ULL << 2);
1798                 low    = lower_32_bits(value);
1799                 high   = upper_32_bits(value);
1800
1801                 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
1802         }
1803
1804         /* Flush tlb to evict multi-match entries */
1805         __flush_tlb_all();
1806
1807         return true;
1808 }
1809
1810 /*
1811  * Trigger machine check on the host. We assume all the MSRs are already set up
1812  * by the CPU and that we still run on the same CPU as the MCE occurred on.
1813  * We pass a fake environment to the machine check handler because we want
1814  * the guest to be always treated like user space, no matter what context
1815  * it used internally.
1816  */
1817 static void kvm_machine_check(void)
1818 {
1819 #if defined(CONFIG_X86_MCE)
1820         struct pt_regs regs = {
1821                 .cs = 3, /* Fake ring 3 no matter what the guest ran on */
1822                 .flags = X86_EFLAGS_IF,
1823         };
1824
1825         do_machine_check(&regs, 0);
1826 #endif
1827 }
1828
1829 static void svm_handle_mce(struct vcpu_svm *svm)
1830 {
1831         if (is_erratum_383()) {
1832                 /*
1833                  * Erratum 383 triggered. Guest state is corrupt so kill the
1834                  * guest.
1835                  */
1836                 pr_err("KVM: Guest triggered AMD Erratum 383\n");
1837
1838                 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
1839
1840                 return;
1841         }
1842
1843         /*
1844          * On an #MC intercept the MCE handler is not called automatically in
1845          * the host. So do it by hand here.
1846          */
1847         kvm_machine_check();
1848 }
1849
1850 static int mc_interception(struct vcpu_svm *svm)
1851 {
1852         return 1;
1853 }
1854
1855 static int shutdown_interception(struct vcpu_svm *svm)
1856 {
1857         struct kvm_run *kvm_run = svm->vcpu.run;
1858
1859         /*
1860          * VMCB is undefined after a SHUTDOWN intercept
1861          * so reinitialize it.
1862          */
1863         clear_page(svm->vmcb);
1864         init_vmcb(svm);
1865
1866         kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
1867         return 0;
1868 }
1869
1870 static int io_interception(struct vcpu_svm *svm)
1871 {
1872         struct kvm_vcpu *vcpu = &svm->vcpu;
1873         u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1874         int size, in, string;
1875         unsigned port;
1876
1877         ++svm->vcpu.stat.io_exits;
1878         string = (io_info & SVM_IOIO_STR_MASK) != 0;
1879         in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1880         if (string)
1881                 return kvm_emulate_instruction(vcpu, 0);
1882
1883         port = io_info >> 16;
1884         size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1885         svm->next_rip = svm->vmcb->control.exit_info_2;
1886
1887         return kvm_fast_pio(&svm->vcpu, size, port, in);
1888 }
1889
1890 static int nmi_interception(struct vcpu_svm *svm)
1891 {
1892         return 1;
1893 }
1894
1895 static int intr_interception(struct vcpu_svm *svm)
1896 {
1897         ++svm->vcpu.stat.irq_exits;
1898         return 1;
1899 }
1900
1901 static int nop_on_interception(struct vcpu_svm *svm)
1902 {
1903         return 1;
1904 }
1905
1906 static int halt_interception(struct vcpu_svm *svm)
1907 {
1908         return kvm_emulate_halt(&svm->vcpu);
1909 }
1910
1911 static int vmmcall_interception(struct vcpu_svm *svm)
1912 {
1913         return kvm_emulate_hypercall(&svm->vcpu);
1914 }
1915
1916 static int vmload_interception(struct vcpu_svm *svm)
1917 {
1918         struct vmcb *nested_vmcb;
1919         struct kvm_host_map map;
1920         int ret;
1921
1922         if (nested_svm_check_permissions(svm))
1923                 return 1;
1924
1925         ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
1926         if (ret) {
1927                 if (ret == -EINVAL)
1928                         kvm_inject_gp(&svm->vcpu, 0);
1929                 return 1;
1930         }
1931
1932         nested_vmcb = map.hva;
1933
1934         ret = kvm_skip_emulated_instruction(&svm->vcpu);
1935
1936         nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
1937         kvm_vcpu_unmap(&svm->vcpu, &map, true);
1938
1939         return ret;
1940 }
1941
1942 static int vmsave_interception(struct vcpu_svm *svm)
1943 {
1944         struct vmcb *nested_vmcb;
1945         struct kvm_host_map map;
1946         int ret;
1947
1948         if (nested_svm_check_permissions(svm))
1949                 return 1;
1950
1951         ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
1952         if (ret) {
1953                 if (ret == -EINVAL)
1954                         kvm_inject_gp(&svm->vcpu, 0);
1955                 return 1;
1956         }
1957
1958         nested_vmcb = map.hva;
1959
1960         ret = kvm_skip_emulated_instruction(&svm->vcpu);
1961
1962         nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
1963         kvm_vcpu_unmap(&svm->vcpu, &map, true);
1964
1965         return ret;
1966 }
1967
1968 static int vmrun_interception(struct vcpu_svm *svm)
1969 {
1970         if (nested_svm_check_permissions(svm))
1971                 return 1;
1972
1973         return nested_svm_vmrun(svm);
1974 }
1975
1976 static int stgi_interception(struct vcpu_svm *svm)
1977 {
1978         int ret;
1979
1980         if (nested_svm_check_permissions(svm))
1981                 return 1;
1982
1983         /*
1984          * If VGIF is enabled, the STGI intercept is only added to
1985          * detect the opening of the SMI/NMI window; remove it now.
1986          */
1987         if (vgif_enabled(svm))
1988                 clr_intercept(svm, INTERCEPT_STGI);
1989
1990         ret = kvm_skip_emulated_instruction(&svm->vcpu);
1991         kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
1992
1993         enable_gif(svm);
1994
1995         return ret;
1996 }
1997
1998 static int clgi_interception(struct vcpu_svm *svm)
1999 {
2000         int ret;
2001
2002         if (nested_svm_check_permissions(svm))
2003                 return 1;
2004
2005         ret = kvm_skip_emulated_instruction(&svm->vcpu);
2006
2007         disable_gif(svm);
2008
2009         /* After a CLGI no interrupts should come */
2010         if (!kvm_vcpu_apicv_active(&svm->vcpu))
2011                 svm_clear_vintr(svm);
2012
2013         return ret;
2014 }
2015
2016 static int invlpga_interception(struct vcpu_svm *svm)
2017 {
2018         struct kvm_vcpu *vcpu = &svm->vcpu;
2019
2020         trace_kvm_invlpga(svm->vmcb->save.rip, kvm_rcx_read(&svm->vcpu),
2021                           kvm_rax_read(&svm->vcpu));
2022
2023         /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
2024         kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
2025
2026         return kvm_skip_emulated_instruction(&svm->vcpu);
2027 }
2028
2029 static int skinit_interception(struct vcpu_svm *svm)
2030 {
2031         trace_kvm_skinit(svm->vmcb->save.rip, kvm_rax_read(&svm->vcpu));
2032
2033         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2034         return 1;
2035 }
2036
2037 static int wbinvd_interception(struct vcpu_svm *svm)
2038 {
2039         return kvm_emulate_wbinvd(&svm->vcpu);
2040 }
2041
2042 static int xsetbv_interception(struct vcpu_svm *svm)
2043 {
2044         u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
2045         u32 index = kvm_rcx_read(&svm->vcpu);
2046
2047         if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
2048                 return kvm_skip_emulated_instruction(&svm->vcpu);
2049         }
2050
2051         return 1;
2052 }
2053
2054 static int rdpru_interception(struct vcpu_svm *svm)
2055 {
2056         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2057         return 1;
2058 }
2059
2060 static int task_switch_interception(struct vcpu_svm *svm)
2061 {
2062         u16 tss_selector;
2063         int reason;
2064         int int_type = svm->vmcb->control.exit_int_info &
2065                 SVM_EXITINTINFO_TYPE_MASK;
2066         int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
2067         uint32_t type =
2068                 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2069         uint32_t idt_v =
2070                 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2071         bool has_error_code = false;
2072         u32 error_code = 0;
2073
2074         tss_selector = (u16)svm->vmcb->control.exit_info_1;
2075
2076         if (svm->vmcb->control.exit_info_2 &
2077             (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
2078                 reason = TASK_SWITCH_IRET;
2079         else if (svm->vmcb->control.exit_info_2 &
2080                  (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
2081                 reason = TASK_SWITCH_JMP;
2082         else if (idt_v)
2083                 reason = TASK_SWITCH_GATE;
2084         else
2085                 reason = TASK_SWITCH_CALL;
2086
2087         if (reason == TASK_SWITCH_GATE) {
2088                 switch (type) {
2089                 case SVM_EXITINTINFO_TYPE_NMI:
2090                         svm->vcpu.arch.nmi_injected = false;
2091                         break;
2092                 case SVM_EXITINTINFO_TYPE_EXEPT:
2093                         if (svm->vmcb->control.exit_info_2 &
2094                             (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
2095                                 has_error_code = true;
2096                                 error_code =
2097                                         (u32)svm->vmcb->control.exit_info_2;
2098                         }
2099                         kvm_clear_exception_queue(&svm->vcpu);
2100                         break;
2101                 case SVM_EXITINTINFO_TYPE_INTR:
2102                         kvm_clear_interrupt_queue(&svm->vcpu);
2103                         break;
2104                 default:
2105                         break;
2106                 }
2107         }
2108
2109         if (reason != TASK_SWITCH_GATE ||
2110             int_type == SVM_EXITINTINFO_TYPE_SOFT ||
2111             (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
2112              (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
2113                 if (!skip_emulated_instruction(&svm->vcpu))
2114                         return 0;
2115         }
2116
2117         if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
2118                 int_vec = -1;
2119
2120         return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
2121                                has_error_code, error_code);
2122 }
2123
2124 static int cpuid_interception(struct vcpu_svm *svm)
2125 {
2126         return kvm_emulate_cpuid(&svm->vcpu);
2127 }
2128
2129 static int iret_interception(struct vcpu_svm *svm)
2130 {
2131         ++svm->vcpu.stat.nmi_window_exits;
2132         clr_intercept(svm, INTERCEPT_IRET);
2133         svm->vcpu.arch.hflags |= HF_IRET_MASK;
2134         svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
2135         kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
2136         return 1;
2137 }
2138
2139 static int invlpg_interception(struct vcpu_svm *svm)
2140 {
2141         if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2142                 return kvm_emulate_instruction(&svm->vcpu, 0);
2143
2144         kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
2145         return kvm_skip_emulated_instruction(&svm->vcpu);
2146 }
2147
2148 static int emulate_on_interception(struct vcpu_svm *svm)
2149 {
2150         return kvm_emulate_instruction(&svm->vcpu, 0);
2151 }
2152
2153 static int rsm_interception(struct vcpu_svm *svm)
2154 {
2155         return kvm_emulate_instruction_from_buffer(&svm->vcpu, rsm_ins_bytes, 2);
2156 }
2157
2158 static int rdpmc_interception(struct vcpu_svm *svm)
2159 {
2160         int err;
2161
2162         if (!nrips)
2163                 return emulate_on_interception(svm);
2164
2165         err = kvm_rdpmc(&svm->vcpu);
2166         return kvm_complete_insn_gp(&svm->vcpu, err);
2167 }
2168
2169 static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
2170                                             unsigned long val)
2171 {
2172         unsigned long cr0 = svm->vcpu.arch.cr0;
2173         bool ret = false;
2174         u64 intercept;
2175
2176         intercept = svm->nested.intercept;
2177
2178         if (!is_guest_mode(&svm->vcpu) ||
2179             (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
2180                 return false;
2181
2182         cr0 &= ~SVM_CR0_SELECTIVE_MASK;
2183         val &= ~SVM_CR0_SELECTIVE_MASK;
2184
2185         if (cr0 ^ val) {
2186                 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
2187                 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
2188         }
2189
2190         return ret;
2191 }
2192
2193 #define CR_VALID (1ULL << 63)
2194
2195 static int cr_interception(struct vcpu_svm *svm)
2196 {
2197         int reg, cr;
2198         unsigned long val;
2199         int err;
2200
2201         if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2202                 return emulate_on_interception(svm);
2203
2204         if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
2205                 return emulate_on_interception(svm);
2206
2207         reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2208         if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
2209                 cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
2210         else
2211                 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
2212
2213         err = 0;
2214         if (cr >= 16) { /* mov to cr */
2215                 cr -= 16;
2216                 val = kvm_register_read(&svm->vcpu, reg);
2217                 switch (cr) {
2218                 case 0:
2219                         if (!check_selective_cr0_intercepted(svm, val))
2220                                 err = kvm_set_cr0(&svm->vcpu, val);
2221                         else
2222                                 return 1;
2223
2224                         break;
2225                 case 3:
2226                         err = kvm_set_cr3(&svm->vcpu, val);
2227                         break;
2228                 case 4:
2229                         err = kvm_set_cr4(&svm->vcpu, val);
2230                         break;
2231                 case 8:
2232                         err = kvm_set_cr8(&svm->vcpu, val);
2233                         break;
2234                 default:
2235                         WARN(1, "unhandled write to CR%d", cr);
2236                         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2237                         return 1;
2238                 }
2239         } else { /* mov from cr */
2240                 switch (cr) {
2241                 case 0:
2242                         val = kvm_read_cr0(&svm->vcpu);
2243                         break;
2244                 case 2:
2245                         val = svm->vcpu.arch.cr2;
2246                         break;
2247                 case 3:
2248                         val = kvm_read_cr3(&svm->vcpu);
2249                         break;
2250                 case 4:
2251                         val = kvm_read_cr4(&svm->vcpu);
2252                         break;
2253                 case 8:
2254                         val = kvm_get_cr8(&svm->vcpu);
2255                         break;
2256                 default:
2257                         WARN(1, "unhandled read from CR%d", cr);
2258                         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2259                         return 1;
2260                 }
2261                 kvm_register_write(&svm->vcpu, reg, val);
2262         }
2263         return kvm_complete_insn_gp(&svm->vcpu, err);
2264 }
2265
2266 static int dr_interception(struct vcpu_svm *svm)
2267 {
2268         int reg, dr;
2269         unsigned long val;
2270
2271         if (svm->vcpu.guest_debug == 0) {
2272                 /*
2273                  * No more DR vmexits; force a reload of the debug registers
2274                  * and reenter on this instruction.  The next vmexit will
2275                  * retrieve the full state of the debug registers.
2276                  */
2277                 clr_dr_intercepts(svm);
2278                 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
2279                 return 1;
2280         }
2281
2282         if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
2283                 return emulate_on_interception(svm);
2284
2285         reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2286         dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
2287
2288         if (dr >= 16) { /* mov to DRn */
2289                 if (!kvm_require_dr(&svm->vcpu, dr - 16))
2290                         return 1;
2291                 val = kvm_register_read(&svm->vcpu, reg);
2292                 kvm_set_dr(&svm->vcpu, dr - 16, val);
2293         } else {
2294                 if (!kvm_require_dr(&svm->vcpu, dr))
2295                         return 1;
2296                 kvm_get_dr(&svm->vcpu, dr, &val);
2297                 kvm_register_write(&svm->vcpu, reg, val);
2298         }
2299
2300         return kvm_skip_emulated_instruction(&svm->vcpu);
2301 }
2302
2303 static int cr8_write_interception(struct vcpu_svm *svm)
2304 {
2305         struct kvm_run *kvm_run = svm->vcpu.run;
2306         int r;
2307
2308         u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
2309         /* instruction emulation calls kvm_set_cr8() */
2310         r = cr_interception(svm);
2311         if (lapic_in_kernel(&svm->vcpu))
2312                 return r;
2313         if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
2314                 return r;
2315         kvm_run->exit_reason = KVM_EXIT_SET_TPR;
2316         return 0;
2317 }
2318
2319 static int svm_get_msr_feature(struct kvm_msr_entry *msr)
2320 {
2321         msr->data = 0;
2322
2323         switch (msr->index) {
2324         case MSR_F10H_DECFG:
2325                 if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
2326                         msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
2327                 break;
2328         default:
2329                 return 1;
2330         }
2331
2332         return 0;
2333 }
2334
2335 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2336 {
2337         struct vcpu_svm *svm = to_svm(vcpu);
2338
2339         switch (msr_info->index) {
2340         case MSR_STAR:
2341                 msr_info->data = svm->vmcb->save.star;
2342                 break;
2343 #ifdef CONFIG_X86_64
2344         case MSR_LSTAR:
2345                 msr_info->data = svm->vmcb->save.lstar;
2346                 break;
2347         case MSR_CSTAR:
2348                 msr_info->data = svm->vmcb->save.cstar;
2349                 break;
2350         case MSR_KERNEL_GS_BASE:
2351                 msr_info->data = svm->vmcb->save.kernel_gs_base;
2352                 break;
2353         case MSR_SYSCALL_MASK:
2354                 msr_info->data = svm->vmcb->save.sfmask;
2355                 break;
2356 #endif
2357         case MSR_IA32_SYSENTER_CS:
2358                 msr_info->data = svm->vmcb->save.sysenter_cs;
2359                 break;
2360         case MSR_IA32_SYSENTER_EIP:
2361                 msr_info->data = svm->sysenter_eip;
2362                 break;
2363         case MSR_IA32_SYSENTER_ESP:
2364                 msr_info->data = svm->sysenter_esp;
2365                 break;
2366         case MSR_TSC_AUX:
2367                 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
2368                         return 1;
2369                 msr_info->data = svm->tsc_aux;
2370                 break;
2371         /*
2372          * Nobody will change the following 5 values in the VMCB so we can
2373          * safely return them on rdmsr. They will always be 0 until LBRV is
2374          * implemented.
2375          */
2376         case MSR_IA32_DEBUGCTLMSR:
2377                 msr_info->data = svm->vmcb->save.dbgctl;
2378                 break;
2379         case MSR_IA32_LASTBRANCHFROMIP:
2380                 msr_info->data = svm->vmcb->save.br_from;
2381                 break;
2382         case MSR_IA32_LASTBRANCHTOIP:
2383                 msr_info->data = svm->vmcb->save.br_to;
2384                 break;
2385         case MSR_IA32_LASTINTFROMIP:
2386                 msr_info->data = svm->vmcb->save.last_excp_from;
2387                 break;
2388         case MSR_IA32_LASTINTTOIP:
2389                 msr_info->data = svm->vmcb->save.last_excp_to;
2390                 break;
2391         case MSR_VM_HSAVE_PA:
2392                 msr_info->data = svm->nested.hsave_msr;
2393                 break;
2394         case MSR_VM_CR:
2395                 msr_info->data = svm->nested.vm_cr_msr;
2396                 break;
2397         case MSR_IA32_SPEC_CTRL:
2398                 if (!msr_info->host_initiated &&
2399                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
2400                     !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
2401                     !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
2402                     !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
2403                         return 1;
2404
2405                 msr_info->data = svm->spec_ctrl;
2406                 break;
2407         case MSR_AMD64_VIRT_SPEC_CTRL:
2408                 if (!msr_info->host_initiated &&
2409                     !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
2410                         return 1;
2411
2412                 msr_info->data = svm->virt_spec_ctrl;
2413                 break;
2414         case MSR_F15H_IC_CFG: {
2415
2416                 int family, model;
2417
2418                 family = guest_cpuid_family(vcpu);
2419                 model  = guest_cpuid_model(vcpu);
2420
2421                 if (family < 0 || model < 0)
2422                         return kvm_get_msr_common(vcpu, msr_info);
2423
2424                 msr_info->data = 0;
2425
2426                 if (family == 0x15 &&
2427                     (model >= 0x2 && model < 0x20))
2428                         msr_info->data = 0x1E;
2429                 }
2430                 break;
2431         case MSR_F10H_DECFG:
2432                 msr_info->data = svm->msr_decfg;
2433                 break;
2434         default:
2435                 return kvm_get_msr_common(vcpu, msr_info);
2436         }
2437         return 0;
2438 }
2439
2440 static int rdmsr_interception(struct vcpu_svm *svm)
2441 {
2442         return kvm_emulate_rdmsr(&svm->vcpu);
2443 }
2444
2445 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
2446 {
2447         struct vcpu_svm *svm = to_svm(vcpu);
2448         int svm_dis, chg_mask;
2449
2450         if (data & ~SVM_VM_CR_VALID_MASK)
2451                 return 1;
2452
2453         chg_mask = SVM_VM_CR_VALID_MASK;
2454
2455         if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
2456                 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
2457
2458         svm->nested.vm_cr_msr &= ~chg_mask;
2459         svm->nested.vm_cr_msr |= (data & chg_mask);
2460
2461         svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
2462
2463         /* check for svm_disable while efer.svme is set */
2464         if (svm_dis && (vcpu->arch.efer & EFER_SVME))
2465                 return 1;
2466
2467         return 0;
2468 }
2469
2470 static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
2471 {
2472         struct vcpu_svm *svm = to_svm(vcpu);
2473
2474         u32 ecx = msr->index;
2475         u64 data = msr->data;
2476         switch (ecx) {
2477         case MSR_IA32_CR_PAT:
2478                 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
2479                         return 1;
2480                 vcpu->arch.pat = data;
2481                 svm->vmcb->save.g_pat = data;
2482                 mark_dirty(svm->vmcb, VMCB_NPT);
2483                 break;
2484         case MSR_IA32_SPEC_CTRL:
2485                 if (!msr->host_initiated &&
2486                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) &&
2487                     !guest_cpuid_has(vcpu, X86_FEATURE_AMD_STIBP) &&
2488                     !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
2489                     !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
2490                         return 1;
2491
2492                 if (data & ~kvm_spec_ctrl_valid_bits(vcpu))
2493                         return 1;
2494
2495                 svm->spec_ctrl = data;
2496                 if (!data)
2497                         break;
2498
2499                 /*
2500                  * For non-nested:
2501                  * When it's written (to non-zero) for the first time, pass
2502                  * it through.
2503                  *
2504                  * For nested:
2505                  * The handling of the MSR bitmap for L2 guests is done in
2506                  * nested_svm_vmrun_msrpm.
2507                  * We update the L1 MSR bit as well since it will end up
2508                  * touching the MSR anyway now.
2509                  */
2510                 set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
2511                 break;
2512         case MSR_IA32_PRED_CMD:
2513                 if (!msr->host_initiated &&
2514                     !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
2515                         return 1;
2516
2517                 if (data & ~PRED_CMD_IBPB)
2518                         return 1;
2519                 if (!boot_cpu_has(X86_FEATURE_AMD_IBPB))
2520                         return 1;
2521                 if (!data)
2522                         break;
2523
2524                 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
2525                 set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
2526                 break;
2527         case MSR_AMD64_VIRT_SPEC_CTRL:
2528                 if (!msr->host_initiated &&
2529                     !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
2530                         return 1;
2531
2532                 if (data & ~SPEC_CTRL_SSBD)
2533                         return 1;
2534
2535                 svm->virt_spec_ctrl = data;
2536                 break;
2537         case MSR_STAR:
2538                 svm->vmcb->save.star = data;
2539                 break;
2540 #ifdef CONFIG_X86_64
2541         case MSR_LSTAR:
2542                 svm->vmcb->save.lstar = data;
2543                 break;
2544         case MSR_CSTAR:
2545                 svm->vmcb->save.cstar = data;
2546                 break;
2547         case MSR_KERNEL_GS_BASE:
2548                 svm->vmcb->save.kernel_gs_base = data;
2549                 break;
2550         case MSR_SYSCALL_MASK:
2551                 svm->vmcb->save.sfmask = data;
2552                 break;
2553 #endif
2554         case MSR_IA32_SYSENTER_CS:
2555                 svm->vmcb->save.sysenter_cs = data;
2556                 break;
2557         case MSR_IA32_SYSENTER_EIP:
2558                 svm->sysenter_eip = data;
2559                 svm->vmcb->save.sysenter_eip = data;
2560                 break;
2561         case MSR_IA32_SYSENTER_ESP:
2562                 svm->sysenter_esp = data;
2563                 svm->vmcb->save.sysenter_esp = data;
2564                 break;
2565         case MSR_TSC_AUX:
2566                 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
2567                         return 1;
2568
2569                 /*
2570                  * This is rare, so we update the MSR here instead of using
2571                  * direct_access_msrs.  Doing that would require a rdmsr in
2572                  * svm_vcpu_put.
2573                  */
2574                 svm->tsc_aux = data;
2575                 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
2576                 break;
2577         case MSR_IA32_DEBUGCTLMSR:
2578                 if (!boot_cpu_has(X86_FEATURE_LBRV)) {
2579                         vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
2580                                     __func__, data);
2581                         break;
2582                 }
2583                 if (data & DEBUGCTL_RESERVED_BITS)
2584                         return 1;
2585
2586                 svm->vmcb->save.dbgctl = data;
2587                 mark_dirty(svm->vmcb, VMCB_LBR);
2588                 if (data & (1ULL<<0))
2589                         svm_enable_lbrv(svm);
2590                 else
2591                         svm_disable_lbrv(svm);
2592                 break;
2593         case MSR_VM_HSAVE_PA:
2594                 svm->nested.hsave_msr = data;
2595                 break;
2596         case MSR_VM_CR:
2597                 return svm_set_vm_cr(vcpu, data);
2598         case MSR_VM_IGNNE:
2599                 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
2600                 break;
2601         case MSR_F10H_DECFG: {
2602                 struct kvm_msr_entry msr_entry;
2603
2604                 msr_entry.index = msr->index;
2605                 if (svm_get_msr_feature(&msr_entry))
2606                         return 1;
2607
2608                 /* Check the supported bits */
2609                 if (data & ~msr_entry.data)
2610                         return 1;
2611
2612                 /* Don't allow the guest to change a bit, #GP */
2613                 if (!msr->host_initiated && (data ^ msr_entry.data))
2614                         return 1;
2615
2616                 svm->msr_decfg = data;
2617                 break;
2618         }
2619         case MSR_IA32_APICBASE:
2620                 if (kvm_vcpu_apicv_active(vcpu))
2621                         avic_update_vapic_bar(to_svm(vcpu), data);
2622                 /* Fall through */
2623         default:
2624                 return kvm_set_msr_common(vcpu, msr);
2625         }
2626         return 0;
2627 }
2628
2629 static int wrmsr_interception(struct vcpu_svm *svm)
2630 {
2631         return kvm_emulate_wrmsr(&svm->vcpu);
2632 }
2633
2634 static int msr_interception(struct vcpu_svm *svm)
2635 {
2636         if (svm->vmcb->control.exit_info_1)
2637                 return wrmsr_interception(svm);
2638         else
2639                 return rdmsr_interception(svm);
2640 }
2641
2642 static int interrupt_window_interception(struct vcpu_svm *svm)
2643 {
2644         kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
2645         svm_clear_vintr(svm);
2646
2647         /*
2648          * For AVIC, the only reason to end up here is ExtINTs.
2649          * In this case AVIC was temporarily disabled for
2650          * requesting the IRQ window and we have to re-enable it.
2651          */
2652         svm_toggle_avic_for_irq_window(&svm->vcpu, true);
2653
2654         ++svm->vcpu.stat.irq_window_exits;
2655         return 1;
2656 }
2657
2658 static int pause_interception(struct vcpu_svm *svm)
2659 {
2660         struct kvm_vcpu *vcpu = &svm->vcpu;
2661         bool in_kernel = (svm_get_cpl(vcpu) == 0);
2662
2663         if (pause_filter_thresh)
2664                 grow_ple_window(vcpu);
2665
2666         kvm_vcpu_on_spin(vcpu, in_kernel);
2667         return 1;
2668 }
2669
2670 static int nop_interception(struct vcpu_svm *svm)
2671 {
2672         return kvm_skip_emulated_instruction(&(svm->vcpu));
2673 }
2674
2675 static int monitor_interception(struct vcpu_svm *svm)
2676 {
2677         printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
2678         return nop_interception(svm);
2679 }
2680
2681 static int mwait_interception(struct vcpu_svm *svm)
2682 {
2683         printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
2684         return nop_interception(svm);
2685 }
2686
2687 static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
2688         [SVM_EXIT_READ_CR0]                     = cr_interception,
2689         [SVM_EXIT_READ_CR3]                     = cr_interception,
2690         [SVM_EXIT_READ_CR4]                     = cr_interception,
2691         [SVM_EXIT_READ_CR8]                     = cr_interception,
2692         [SVM_EXIT_CR0_SEL_WRITE]                = cr_interception,
2693         [SVM_EXIT_WRITE_CR0]                    = cr_interception,
2694         [SVM_EXIT_WRITE_CR3]                    = cr_interception,
2695         [SVM_EXIT_WRITE_CR4]                    = cr_interception,
2696         [SVM_EXIT_WRITE_CR8]                    = cr8_write_interception,
2697         [SVM_EXIT_READ_DR0]                     = dr_interception,
2698         [SVM_EXIT_READ_DR1]                     = dr_interception,
2699         [SVM_EXIT_READ_DR2]                     = dr_interception,
2700         [SVM_EXIT_READ_DR3]                     = dr_interception,
2701         [SVM_EXIT_READ_DR4]                     = dr_interception,
2702         [SVM_EXIT_READ_DR5]                     = dr_interception,
2703         [SVM_EXIT_READ_DR6]                     = dr_interception,
2704         [SVM_EXIT_READ_DR7]                     = dr_interception,
2705         [SVM_EXIT_WRITE_DR0]                    = dr_interception,
2706         [SVM_EXIT_WRITE_DR1]                    = dr_interception,
2707         [SVM_EXIT_WRITE_DR2]                    = dr_interception,
2708         [SVM_EXIT_WRITE_DR3]                    = dr_interception,
2709         [SVM_EXIT_WRITE_DR4]                    = dr_interception,
2710         [SVM_EXIT_WRITE_DR5]                    = dr_interception,
2711         [SVM_EXIT_WRITE_DR6]                    = dr_interception,
2712         [SVM_EXIT_WRITE_DR7]                    = dr_interception,
2713         [SVM_EXIT_EXCP_BASE + DB_VECTOR]        = db_interception,
2714         [SVM_EXIT_EXCP_BASE + BP_VECTOR]        = bp_interception,
2715         [SVM_EXIT_EXCP_BASE + UD_VECTOR]        = ud_interception,
2716         [SVM_EXIT_EXCP_BASE + PF_VECTOR]        = pf_interception,
2717         [SVM_EXIT_EXCP_BASE + MC_VECTOR]        = mc_interception,
2718         [SVM_EXIT_EXCP_BASE + AC_VECTOR]        = ac_interception,
2719         [SVM_EXIT_EXCP_BASE + GP_VECTOR]        = gp_interception,
2720         [SVM_EXIT_INTR]                         = intr_interception,
2721         [SVM_EXIT_NMI]                          = nmi_interception,
2722         [SVM_EXIT_SMI]                          = nop_on_interception,
2723         [SVM_EXIT_INIT]                         = nop_on_interception,
2724         [SVM_EXIT_VINTR]                        = interrupt_window_interception,
2725         [SVM_EXIT_RDPMC]                        = rdpmc_interception,
2726         [SVM_EXIT_CPUID]                        = cpuid_interception,
2727         [SVM_EXIT_IRET]                         = iret_interception,
2728         [SVM_EXIT_INVD]                         = emulate_on_interception,
2729         [SVM_EXIT_PAUSE]                        = pause_interception,
2730         [SVM_EXIT_HLT]                          = halt_interception,
2731         [SVM_EXIT_INVLPG]                       = invlpg_interception,
2732         [SVM_EXIT_INVLPGA]                      = invlpga_interception,
2733         [SVM_EXIT_IOIO]                         = io_interception,
2734         [SVM_EXIT_MSR]                          = msr_interception,
2735         [SVM_EXIT_TASK_SWITCH]                  = task_switch_interception,
2736         [SVM_EXIT_SHUTDOWN]                     = shutdown_interception,
2737         [SVM_EXIT_VMRUN]                        = vmrun_interception,
2738         [SVM_EXIT_VMMCALL]                      = vmmcall_interception,
2739         [SVM_EXIT_VMLOAD]                       = vmload_interception,
2740         [SVM_EXIT_VMSAVE]                       = vmsave_interception,
2741         [SVM_EXIT_STGI]                         = stgi_interception,
2742         [SVM_EXIT_CLGI]                         = clgi_interception,
2743         [SVM_EXIT_SKINIT]                       = skinit_interception,
2744         [SVM_EXIT_WBINVD]                       = wbinvd_interception,
2745         [SVM_EXIT_MONITOR]                      = monitor_interception,
2746         [SVM_EXIT_MWAIT]                        = mwait_interception,
2747         [SVM_EXIT_XSETBV]                       = xsetbv_interception,
2748         [SVM_EXIT_RDPRU]                        = rdpru_interception,
2749         [SVM_EXIT_NPF]                          = npf_interception,
2750         [SVM_EXIT_RSM]                          = rsm_interception,
2751         [SVM_EXIT_AVIC_INCOMPLETE_IPI]          = avic_incomplete_ipi_interception,
2752         [SVM_EXIT_AVIC_UNACCELERATED_ACCESS]    = avic_unaccelerated_access_interception,
2753 };
2754
2755 static void dump_vmcb(struct kvm_vcpu *vcpu)
2756 {
2757         struct vcpu_svm *svm = to_svm(vcpu);
2758         struct vmcb_control_area *control = &svm->vmcb->control;
2759         struct vmcb_save_area *save = &svm->vmcb->save;
2760
2761         if (!dump_invalid_vmcb) {
2762                 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
2763                 return;
2764         }
2765
2766         pr_err("VMCB Control Area:\n");
2767         pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
2768         pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
2769         pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
2770         pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
2771         pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
2772         pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
2773         pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
2774         pr_err("%-20s%d\n", "pause filter threshold:",
2775                control->pause_filter_thresh);
2776         pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
2777         pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
2778         pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
2779         pr_err("%-20s%d\n", "asid:", control->asid);
2780         pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
2781         pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
2782         pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
2783         pr_err("%-20s%08x\n", "int_state:", control->int_state);
2784         pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
2785         pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
2786         pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
2787         pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
2788         pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
2789         pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
2790         pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
2791         pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
2792         pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
2793         pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
2794         pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
2795         pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
2796         pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
2797         pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
2798         pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
2799         pr_err("VMCB State Save Area:\n");
2800         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
2801                "es:",
2802                save->es.selector, save->es.attrib,
2803                save->es.limit, save->es.base);
2804         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
2805                "cs:",
2806                save->cs.selector, save->cs.attrib,
2807                save->cs.limit, save->cs.base);
2808         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
2809                "ss:",
2810                save->ss.selector, save->ss.attrib,
2811                save->ss.limit, save->ss.base);
2812         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
2813                "ds:",
2814                save->ds.selector, save->ds.attrib,
2815                save->ds.limit, save->ds.base);
2816         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
2817                "fs:",
2818                save->fs.selector, save->fs.attrib,
2819                save->fs.limit, save->fs.base);
2820         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
2821                "gs:",
2822                save->gs.selector, save->gs.attrib,
2823                save->gs.limit, save->gs.base);
2824         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
2825                "gdtr:",
2826                save->gdtr.selector, save->gdtr.attrib,
2827                save->gdtr.limit, save->gdtr.base);
2828         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
2829                "ldtr:",
2830                save->ldtr.selector, save->ldtr.attrib,
2831                save->ldtr.limit, save->ldtr.base);
2832         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
2833                "idtr:",
2834                save->idtr.selector, save->idtr.attrib,
2835                save->idtr.limit, save->idtr.base);
2836         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
2837                "tr:",
2838                save->tr.selector, save->tr.attrib,
2839                save->tr.limit, save->tr.base);
2840         pr_err("cpl:            %d                efer:         %016llx\n",
2841                 save->cpl, save->efer);
2842         pr_err("%-15s %016llx %-13s %016llx\n",
2843                "cr0:", save->cr0, "cr2:", save->cr2);
2844         pr_err("%-15s %016llx %-13s %016llx\n",
2845                "cr3:", save->cr3, "cr4:", save->cr4);
2846         pr_err("%-15s %016llx %-13s %016llx\n",
2847                "dr6:", save->dr6, "dr7:", save->dr7);
2848         pr_err("%-15s %016llx %-13s %016llx\n",
2849                "rip:", save->rip, "rflags:", save->rflags);
2850         pr_err("%-15s %016llx %-13s %016llx\n",
2851                "rsp:", save->rsp, "rax:", save->rax);
2852         pr_err("%-15s %016llx %-13s %016llx\n",
2853                "star:", save->star, "lstar:", save->lstar);
2854         pr_err("%-15s %016llx %-13s %016llx\n",
2855                "cstar:", save->cstar, "sfmask:", save->sfmask);
2856         pr_err("%-15s %016llx %-13s %016llx\n",
2857                "kernel_gs_base:", save->kernel_gs_base,
2858                "sysenter_cs:", save->sysenter_cs);
2859         pr_err("%-15s %016llx %-13s %016llx\n",
2860                "sysenter_esp:", save->sysenter_esp,
2861                "sysenter_eip:", save->sysenter_eip);
2862         pr_err("%-15s %016llx %-13s %016llx\n",
2863                "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
2864         pr_err("%-15s %016llx %-13s %016llx\n",
2865                "br_from:", save->br_from, "br_to:", save->br_to);
2866         pr_err("%-15s %016llx %-13s %016llx\n",
2867                "excp_from:", save->last_excp_from,
2868                "excp_to:", save->last_excp_to);
2869 }
2870
2871 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
2872 {
2873         struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
2874
2875         *info1 = control->exit_info_1;
2876         *info2 = control->exit_info_2;
2877 }
2878
2879 static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
2880 {
2881         struct vcpu_svm *svm = to_svm(vcpu);
2882         struct kvm_run *kvm_run = vcpu->run;
2883         u32 exit_code = svm->vmcb->control.exit_code;
2884
2885         trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
2886
2887         if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
2888                 vcpu->arch.cr0 = svm->vmcb->save.cr0;
2889         if (npt_enabled)
2890                 vcpu->arch.cr3 = svm->vmcb->save.cr3;
2891
2892         if (is_guest_mode(vcpu)) {
2893                 int vmexit;
2894
2895                 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
2896                                         svm->vmcb->control.exit_info_1,
2897                                         svm->vmcb->control.exit_info_2,
2898                                         svm->vmcb->control.exit_int_info,
2899                                         svm->vmcb->control.exit_int_info_err,
2900                                         KVM_ISA_SVM);
2901
2902                 vmexit = nested_svm_exit_special(svm);
2903
2904                 if (vmexit == NESTED_EXIT_CONTINUE)
2905                         vmexit = nested_svm_exit_handled(svm);
2906
2907                 if (vmexit == NESTED_EXIT_DONE)
2908                         return 1;
2909         }
2910
2911         svm_complete_interrupts(svm);
2912
2913         if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
2914                 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
2915                 kvm_run->fail_entry.hardware_entry_failure_reason
2916                         = svm->vmcb->control.exit_code;
2917                 dump_vmcb(vcpu);
2918                 return 0;
2919         }
2920
2921         if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
2922             exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
2923             exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
2924             exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
2925                 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
2926                        "exit_code 0x%x\n",
2927                        __func__, svm->vmcb->control.exit_int_info,
2928                        exit_code);
2929
2930         if (exit_fastpath != EXIT_FASTPATH_NONE)
2931                 return 1;
2932
2933         if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
2934             || !svm_exit_handlers[exit_code]) {
2935                 vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
2936                 dump_vmcb(vcpu);
2937                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2938                 vcpu->run->internal.suberror =
2939                         KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
2940                 vcpu->run->internal.ndata = 1;
2941                 vcpu->run->internal.data[0] = exit_code;
2942                 return 0;
2943         }
2944
2945 #ifdef CONFIG_RETPOLINE
2946         if (exit_code == SVM_EXIT_MSR)
2947                 return msr_interception(svm);
2948         else if (exit_code == SVM_EXIT_VINTR)
2949                 return interrupt_window_interception(svm);
2950         else if (exit_code == SVM_EXIT_INTR)
2951                 return intr_interception(svm);
2952         else if (exit_code == SVM_EXIT_HLT)
2953                 return halt_interception(svm);
2954         else if (exit_code == SVM_EXIT_NPF)
2955                 return npf_interception(svm);
2956 #endif
2957         return svm_exit_handlers[exit_code](svm);
2958 }
2959
2960 static void reload_tss(struct kvm_vcpu *vcpu)
2961 {
2962         int cpu = raw_smp_processor_id();
2963
2964         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2965         sd->tss_desc->type = 9; /* available 32/64-bit TSS */
2966         load_TR_desc();
2967 }
2968
2969 static void pre_svm_run(struct vcpu_svm *svm)
2970 {
2971         int cpu = raw_smp_processor_id();
2972
2973         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2974
2975         if (sev_guest(svm->vcpu.kvm))
2976                 return pre_sev_run(svm, cpu);
2977
2978         /* FIXME: handle wraparound of asid_generation */
2979         if (svm->asid_generation != sd->asid_generation)
2980                 new_asid(svm, sd);
2981 }
2982
2983 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
2984 {
2985         struct vcpu_svm *svm = to_svm(vcpu);
2986
2987         svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
2988         vcpu->arch.hflags |= HF_NMI_MASK;
2989         set_intercept(svm, INTERCEPT_IRET);
2990         ++vcpu->stat.nmi_injections;
2991 }
2992
2993 static void svm_set_irq(struct kvm_vcpu *vcpu)
2994 {
2995         struct vcpu_svm *svm = to_svm(vcpu);
2996
2997         BUG_ON(!(gif_set(svm)));
2998
2999         trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
3000         ++vcpu->stat.irq_injections;
3001
3002         svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
3003                 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
3004 }
3005
3006 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3007 {
3008         struct vcpu_svm *svm = to_svm(vcpu);
3009
3010         if (svm_nested_virtualize_tpr(vcpu))
3011                 return;
3012
3013         clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3014
3015         if (irr == -1)
3016                 return;
3017
3018         if (tpr >= irr)
3019                 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3020 }
3021
3022 bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
3023 {
3024         struct vcpu_svm *svm = to_svm(vcpu);
3025         struct vmcb *vmcb = svm->vmcb;
3026         bool ret;
3027
3028         if (!gif_set(svm))
3029                 return true;
3030
3031         if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
3032                 return false;
3033
3034         ret = (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
3035               (svm->vcpu.arch.hflags & HF_NMI_MASK);
3036
3037         return ret;
3038 }
3039
3040 static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
3041 {
3042         struct vcpu_svm *svm = to_svm(vcpu);
3043         if (svm->nested.nested_run_pending)
3044                 return -EBUSY;
3045
3046         /* An NMI must not be injected into L2 if it's supposed to VM-Exit.  */
3047         if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
3048                 return -EBUSY;
3049
3050         return !svm_nmi_blocked(vcpu);
3051 }
3052
3053 static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
3054 {
3055         struct vcpu_svm *svm = to_svm(vcpu);
3056
3057         return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
3058 }
3059
3060 static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
3061 {
3062         struct vcpu_svm *svm = to_svm(vcpu);
3063
3064         if (masked) {
3065                 svm->vcpu.arch.hflags |= HF_NMI_MASK;
3066                 set_intercept(svm, INTERCEPT_IRET);
3067         } else {
3068                 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
3069                 clr_intercept(svm, INTERCEPT_IRET);
3070         }
3071 }
3072
3073 bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
3074 {
3075         struct vcpu_svm *svm = to_svm(vcpu);
3076         struct vmcb *vmcb = svm->vmcb;
3077
3078         if (!gif_set(svm))
3079                 return true;
3080
3081         if (is_guest_mode(vcpu)) {
3082                 /* As long as interrupts are being delivered...  */
3083                 if ((svm->vcpu.arch.hflags & HF_VINTR_MASK)
3084                     ? !(svm->vcpu.arch.hflags & HF_HIF_MASK)
3085                     : !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
3086                         return true;
3087
3088                 /* ... vmexits aren't blocked by the interrupt shadow  */
3089                 if (nested_exit_on_intr(svm))
3090                         return false;
3091         } else {
3092                 if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
3093                         return true;
3094         }
3095
3096         return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK);
3097 }
3098
3099 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
3100 {
3101         struct vcpu_svm *svm = to_svm(vcpu);
3102         if (svm->nested.nested_run_pending)
3103                 return -EBUSY;
3104
3105         /*
3106          * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
3107          * e.g. if the IRQ arrived asynchronously after checking nested events.
3108          */
3109         if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm))
3110                 return -EBUSY;
3111
3112         return !svm_interrupt_blocked(vcpu);
3113 }
3114
3115 static void enable_irq_window(struct kvm_vcpu *vcpu)
3116 {
3117         struct vcpu_svm *svm = to_svm(vcpu);
3118
3119         /*
3120          * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
3121          * 1, because that's a separate STGI/VMRUN intercept.  The next time we
3122          * get that intercept, this function will be called again though and
3123          * we'll get the vintr intercept. However, if the vGIF feature is
3124          * enabled, the STGI interception will not occur. Enable the irq
3125          * window under the assumption that the hardware will set the GIF.
3126          */
3127         if (vgif_enabled(svm) || gif_set(svm)) {
3128                 /*
3129                  * IRQ window is not needed when AVIC is enabled,
3130                  * unless we have pending ExtINT since it cannot be injected
3131                  * via AVIC. In such case, we need to temporarily disable AVIC,
3132                  * and fallback to injecting IRQ via V_IRQ.
3133                  */
3134                 svm_toggle_avic_for_irq_window(vcpu, false);
3135                 svm_set_vintr(svm);
3136         }
3137 }
3138
3139 static void enable_nmi_window(struct kvm_vcpu *vcpu)
3140 {
3141         struct vcpu_svm *svm = to_svm(vcpu);
3142
3143         if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
3144             == HF_NMI_MASK)
3145                 return; /* IRET will cause a vm exit */
3146
3147         if (!gif_set(svm)) {
3148                 if (vgif_enabled(svm))
3149                         set_intercept(svm, INTERCEPT_STGI);
3150                 return; /* STGI will cause a vm exit */
3151         }
3152
3153         /*
3154          * Something prevents NMI from been injected. Single step over possible
3155          * problem (IRET or exception injection or interrupt shadow)
3156          */
3157         svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
3158         svm->nmi_singlestep = true;
3159         svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
3160 }
3161
3162 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
3163 {
3164         return 0;
3165 }
3166
3167 static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
3168 {
3169         return 0;
3170 }
3171
3172 void svm_flush_tlb(struct kvm_vcpu *vcpu)
3173 {
3174         struct vcpu_svm *svm = to_svm(vcpu);
3175
3176         /*
3177          * Flush only the current ASID even if the TLB flush was invoked via
3178          * kvm_flush_remote_tlbs().  Although flushing remote TLBs requires all
3179          * ASIDs to be flushed, KVM uses a single ASID for L1 and L2, and
3180          * unconditionally does a TLB flush on both nested VM-Enter and nested
3181          * VM-Exit (via kvm_mmu_reset_context()).
3182          */
3183         if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
3184                 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
3185         else
3186                 svm->asid_generation--;
3187 }
3188
3189 static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
3190 {
3191         struct vcpu_svm *svm = to_svm(vcpu);
3192
3193         invlpga(gva, svm->vmcb->control.asid);
3194 }
3195
3196 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
3197 {
3198 }
3199
3200 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
3201 {
3202         struct vcpu_svm *svm = to_svm(vcpu);
3203
3204         if (svm_nested_virtualize_tpr(vcpu))
3205                 return;
3206
3207         if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
3208                 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
3209                 kvm_set_cr8(vcpu, cr8);
3210         }
3211 }
3212
3213 static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
3214 {
3215         struct vcpu_svm *svm = to_svm(vcpu);
3216         u64 cr8;
3217
3218         if (svm_nested_virtualize_tpr(vcpu) ||
3219             kvm_vcpu_apicv_active(vcpu))
3220                 return;
3221
3222         cr8 = kvm_get_cr8(vcpu);
3223         svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
3224         svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
3225 }
3226
3227 static void svm_complete_interrupts(struct vcpu_svm *svm)
3228 {
3229         u8 vector;
3230         int type;
3231         u32 exitintinfo = svm->vmcb->control.exit_int_info;
3232         unsigned int3_injected = svm->int3_injected;
3233
3234         svm->int3_injected = 0;
3235
3236         /*
3237          * If we've made progress since setting HF_IRET_MASK, we've
3238          * executed an IRET and can allow NMI injection.
3239          */
3240         if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
3241             && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
3242                 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
3243                 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3244         }
3245
3246         svm->vcpu.arch.nmi_injected = false;
3247         kvm_clear_exception_queue(&svm->vcpu);
3248         kvm_clear_interrupt_queue(&svm->vcpu);
3249
3250         if (!(exitintinfo & SVM_EXITINTINFO_VALID))
3251                 return;
3252
3253         kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3254
3255         vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
3256         type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
3257
3258         switch (type) {
3259         case SVM_EXITINTINFO_TYPE_NMI:
3260                 svm->vcpu.arch.nmi_injected = true;
3261                 break;
3262         case SVM_EXITINTINFO_TYPE_EXEPT:
3263                 /*
3264                  * In case of software exceptions, do not reinject the vector,
3265                  * but re-execute the instruction instead. Rewind RIP first
3266                  * if we emulated INT3 before.
3267                  */
3268                 if (kvm_exception_is_soft(vector)) {
3269                         if (vector == BP_VECTOR && int3_injected &&
3270                             kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
3271                                 kvm_rip_write(&svm->vcpu,
3272                                               kvm_rip_read(&svm->vcpu) -
3273                                               int3_injected);
3274                         break;
3275                 }
3276                 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
3277                         u32 err = svm->vmcb->control.exit_int_info_err;
3278                         kvm_requeue_exception_e(&svm->vcpu, vector, err);
3279
3280                 } else
3281                         kvm_requeue_exception(&svm->vcpu, vector);
3282                 break;
3283         case SVM_EXITINTINFO_TYPE_INTR:
3284                 kvm_queue_interrupt(&svm->vcpu, vector, false);
3285                 break;
3286         default:
3287                 break;
3288         }
3289 }
3290
3291 static void svm_cancel_injection(struct kvm_vcpu *vcpu)
3292 {
3293         struct vcpu_svm *svm = to_svm(vcpu);
3294         struct vmcb_control_area *control = &svm->vmcb->control;
3295
3296         control->exit_int_info = control->event_inj;
3297         control->exit_int_info_err = control->event_inj_err;
3298         control->event_inj = 0;
3299         svm_complete_interrupts(svm);
3300 }
3301
3302 static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
3303 {
3304         if (!is_guest_mode(vcpu) &&
3305             to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
3306             to_svm(vcpu)->vmcb->control.exit_info_1)
3307                 return handle_fastpath_set_msr_irqoff(vcpu);
3308
3309         return EXIT_FASTPATH_NONE;
3310 }
3311
3312 void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
3313
3314 static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
3315 {
3316         fastpath_t exit_fastpath;
3317         struct vcpu_svm *svm = to_svm(vcpu);
3318
3319         svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
3320         svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3321         svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
3322
3323         /*
3324          * Disable singlestep if we're injecting an interrupt/exception.
3325          * We don't want our modified rflags to be pushed on the stack where
3326          * we might not be able to easily reset them if we disabled NMI
3327          * singlestep later.
3328          */
3329         if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
3330                 /*
3331                  * Event injection happens before external interrupts cause a
3332                  * vmexit and interrupts are disabled here, so smp_send_reschedule
3333                  * is enough to force an immediate vmexit.
3334                  */
3335                 disable_nmi_singlestep(svm);
3336                 smp_send_reschedule(vcpu->cpu);
3337         }
3338
3339         pre_svm_run(svm);
3340
3341         sync_lapic_to_cr8(vcpu);
3342
3343         svm->vmcb->save.cr2 = vcpu->arch.cr2;
3344
3345         /*
3346          * Run with all-zero DR6 unless needed, so that we can get the exact cause
3347          * of a #DB.
3348          */
3349         if (unlikely(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
3350                 svm_set_dr6(svm, vcpu->arch.dr6);
3351         else
3352                 svm_set_dr6(svm, DR6_FIXED_1 | DR6_RTM);
3353
3354         clgi();
3355         kvm_load_guest_xsave_state(vcpu);
3356
3357         if (lapic_in_kernel(vcpu) &&
3358                 vcpu->arch.apic->lapic_timer.timer_advance_ns)
3359                 kvm_wait_lapic_expire(vcpu);
3360
3361         /*
3362          * If this vCPU has touched SPEC_CTRL, restore the guest's value if
3363          * it's non-zero. Since vmentry is serialising on affected CPUs, there
3364          * is no need to worry about the conditional branch over the wrmsr
3365          * being speculatively taken.
3366          */
3367         x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
3368
3369         __svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
3370
3371 #ifdef CONFIG_X86_64
3372         wrmsrl(MSR_GS_BASE, svm->host.gs_base);
3373 #else
3374         loadsegment(fs, svm->host.fs);
3375 #ifndef CONFIG_X86_32_LAZY_GS
3376         loadsegment(gs, svm->host.gs);
3377 #endif
3378 #endif
3379
3380         /*
3381          * We do not use IBRS in the kernel. If this vCPU has used the
3382          * SPEC_CTRL MSR it may have left it on; save the value and
3383          * turn it off. This is much more efficient than blindly adding
3384          * it to the atomic save/restore list. Especially as the former
3385          * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
3386          *
3387          * For non-nested case:
3388          * If the L01 MSR bitmap does not intercept the MSR, then we need to
3389          * save it.
3390          *
3391          * For nested case:
3392          * If the L02 MSR bitmap does not intercept the MSR, then we need to
3393          * save it.
3394          */
3395         if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
3396                 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
3397
3398         reload_tss(vcpu);
3399
3400         x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
3401
3402         vcpu->arch.cr2 = svm->vmcb->save.cr2;
3403         vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
3404         vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3405         vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3406
3407         if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3408                 kvm_before_interrupt(&svm->vcpu);
3409
3410         kvm_load_host_xsave_state(vcpu);
3411         stgi();
3412
3413         /* Any pending NMI will happen here */
3414         exit_fastpath = svm_exit_handlers_fastpath(vcpu);
3415
3416         if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3417                 kvm_after_interrupt(&svm->vcpu);
3418
3419         sync_cr8_to_lapic(vcpu);
3420
3421         svm->next_rip = 0;
3422         svm->nested.nested_run_pending = 0;
3423
3424         svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
3425
3426         /* if exit due to PF check for async PF */
3427         if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
3428                 svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
3429
3430         if (npt_enabled) {
3431                 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
3432                 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
3433         }
3434
3435         /*
3436          * We need to handle MC intercepts here before the vcpu has a chance to
3437          * change the physical cpu
3438          */
3439         if (unlikely(svm->vmcb->control.exit_code ==
3440                      SVM_EXIT_EXCP_BASE + MC_VECTOR))
3441                 svm_handle_mce(svm);
3442
3443         mark_all_clean(svm->vmcb);
3444         return exit_fastpath;
3445 }
3446
3447 static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root)
3448 {
3449         struct vcpu_svm *svm = to_svm(vcpu);
3450         unsigned long cr3;
3451
3452         cr3 = __sme_set(root);
3453         if (npt_enabled) {
3454                 svm->vmcb->control.nested_cr3 = cr3;
3455                 mark_dirty(svm->vmcb, VMCB_NPT);
3456
3457                 /* Loading L2's CR3 is handled by enter_svm_guest_mode.  */
3458                 if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
3459                         return;
3460                 cr3 = vcpu->arch.cr3;
3461         }
3462
3463         svm->vmcb->save.cr3 = cr3;
3464         mark_dirty(svm->vmcb, VMCB_CR);
3465 }
3466
3467 static int is_disabled(void)
3468 {
3469         u64 vm_cr;
3470
3471         rdmsrl(MSR_VM_CR, vm_cr);
3472         if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
3473                 return 1;
3474
3475         return 0;
3476 }
3477
3478 static void
3479 svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
3480 {
3481         /*
3482          * Patch in the VMMCALL instruction:
3483          */
3484         hypercall[0] = 0x0f;
3485         hypercall[1] = 0x01;
3486         hypercall[2] = 0xd9;
3487 }
3488
3489 static int __init svm_check_processor_compat(void)
3490 {
3491         return 0;
3492 }
3493
3494 static bool svm_cpu_has_accelerated_tpr(void)
3495 {
3496         return false;
3497 }
3498
3499 static bool svm_has_emulated_msr(u32 index)
3500 {
3501         switch (index) {
3502         case MSR_IA32_MCG_EXT_CTL:
3503         case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
3504                 return false;
3505         default:
3506                 break;
3507         }
3508
3509         return true;
3510 }
3511
3512 static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3513 {
3514         return 0;
3515 }
3516
3517 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
3518 {
3519         struct vcpu_svm *svm = to_svm(vcpu);
3520
3521         vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
3522                                     boot_cpu_has(X86_FEATURE_XSAVE) &&
3523                                     boot_cpu_has(X86_FEATURE_XSAVES);
3524
3525         /* Update nrips enabled cache */
3526         svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
3527                              guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
3528
3529         if (!kvm_vcpu_apicv_active(vcpu))
3530                 return;
3531
3532         /*
3533          * AVIC does not work with an x2APIC mode guest. If the X2APIC feature
3534          * is exposed to the guest, disable AVIC.
3535          */
3536         if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC))
3537                 kvm_request_apicv_update(vcpu->kvm, false,
3538                                          APICV_INHIBIT_REASON_X2APIC);
3539
3540         /*
3541          * Currently, AVIC does not work with nested virtualization.
3542          * So, we disable AVIC when cpuid for SVM is set in the L1 guest.
3543          */
3544         if (nested && guest_cpuid_has(vcpu, X86_FEATURE_SVM))
3545                 kvm_request_apicv_update(vcpu->kvm, false,
3546                                          APICV_INHIBIT_REASON_NESTED);
3547 }
3548
3549 static bool svm_has_wbinvd_exit(void)
3550 {
3551         return true;
3552 }
3553
3554 #define PRE_EX(exit)  { .exit_code = (exit), \
3555                         .stage = X86_ICPT_PRE_EXCEPT, }
3556 #define POST_EX(exit) { .exit_code = (exit), \
3557                         .stage = X86_ICPT_POST_EXCEPT, }
3558 #define POST_MEM(exit) { .exit_code = (exit), \
3559                         .stage = X86_ICPT_POST_MEMACCESS, }
3560
3561 static const struct __x86_intercept {
3562         u32 exit_code;
3563         enum x86_intercept_stage stage;
3564 } x86_intercept_map[] = {
3565         [x86_intercept_cr_read]         = POST_EX(SVM_EXIT_READ_CR0),
3566         [x86_intercept_cr_write]        = POST_EX(SVM_EXIT_WRITE_CR0),
3567         [x86_intercept_clts]            = POST_EX(SVM_EXIT_WRITE_CR0),
3568         [x86_intercept_lmsw]            = POST_EX(SVM_EXIT_WRITE_CR0),
3569         [x86_intercept_smsw]            = POST_EX(SVM_EXIT_READ_CR0),
3570         [x86_intercept_dr_read]         = POST_EX(SVM_EXIT_READ_DR0),
3571         [x86_intercept_dr_write]        = POST_EX(SVM_EXIT_WRITE_DR0),
3572         [x86_intercept_sldt]            = POST_EX(SVM_EXIT_LDTR_READ),
3573         [x86_intercept_str]             = POST_EX(SVM_EXIT_TR_READ),
3574         [x86_intercept_lldt]            = POST_EX(SVM_EXIT_LDTR_WRITE),
3575         [x86_intercept_ltr]             = POST_EX(SVM_EXIT_TR_WRITE),
3576         [x86_intercept_sgdt]            = POST_EX(SVM_EXIT_GDTR_READ),
3577         [x86_intercept_sidt]            = POST_EX(SVM_EXIT_IDTR_READ),
3578         [x86_intercept_lgdt]            = POST_EX(SVM_EXIT_GDTR_WRITE),
3579         [x86_intercept_lidt]            = POST_EX(SVM_EXIT_IDTR_WRITE),
3580         [x86_intercept_vmrun]           = POST_EX(SVM_EXIT_VMRUN),
3581         [x86_intercept_vmmcall]         = POST_EX(SVM_EXIT_VMMCALL),
3582         [x86_intercept_vmload]          = POST_EX(SVM_EXIT_VMLOAD),
3583         [x86_intercept_vmsave]          = POST_EX(SVM_EXIT_VMSAVE),
3584         [x86_intercept_stgi]            = POST_EX(SVM_EXIT_STGI),
3585         [x86_intercept_clgi]            = POST_EX(SVM_EXIT_CLGI),
3586         [x86_intercept_skinit]          = POST_EX(SVM_EXIT_SKINIT),
3587         [x86_intercept_invlpga]         = POST_EX(SVM_EXIT_INVLPGA),
3588         [x86_intercept_rdtscp]          = POST_EX(SVM_EXIT_RDTSCP),
3589         [x86_intercept_monitor]         = POST_MEM(SVM_EXIT_MONITOR),
3590         [x86_intercept_mwait]           = POST_EX(SVM_EXIT_MWAIT),
3591         [x86_intercept_invlpg]          = POST_EX(SVM_EXIT_INVLPG),
3592         [x86_intercept_invd]            = POST_EX(SVM_EXIT_INVD),
3593         [x86_intercept_wbinvd]          = POST_EX(SVM_EXIT_WBINVD),
3594         [x86_intercept_wrmsr]           = POST_EX(SVM_EXIT_MSR),
3595         [x86_intercept_rdtsc]           = POST_EX(SVM_EXIT_RDTSC),
3596         [x86_intercept_rdmsr]           = POST_EX(SVM_EXIT_MSR),
3597         [x86_intercept_rdpmc]           = POST_EX(SVM_EXIT_RDPMC),
3598         [x86_intercept_cpuid]           = PRE_EX(SVM_EXIT_CPUID),
3599         [x86_intercept_rsm]             = PRE_EX(SVM_EXIT_RSM),
3600         [x86_intercept_pause]           = PRE_EX(SVM_EXIT_PAUSE),
3601         [x86_intercept_pushf]           = PRE_EX(SVM_EXIT_PUSHF),
3602         [x86_intercept_popf]            = PRE_EX(SVM_EXIT_POPF),
3603         [x86_intercept_intn]            = PRE_EX(SVM_EXIT_SWINT),
3604         [x86_intercept_iret]            = PRE_EX(SVM_EXIT_IRET),
3605         [x86_intercept_icebp]           = PRE_EX(SVM_EXIT_ICEBP),
3606         [x86_intercept_hlt]             = POST_EX(SVM_EXIT_HLT),
3607         [x86_intercept_in]              = POST_EX(SVM_EXIT_IOIO),
3608         [x86_intercept_ins]             = POST_EX(SVM_EXIT_IOIO),
3609         [x86_intercept_out]             = POST_EX(SVM_EXIT_IOIO),
3610         [x86_intercept_outs]            = POST_EX(SVM_EXIT_IOIO),
3611         [x86_intercept_xsetbv]          = PRE_EX(SVM_EXIT_XSETBV),
3612 };
3613
3614 #undef PRE_EX
3615 #undef POST_EX
3616 #undef POST_MEM
3617
3618 static int svm_check_intercept(struct kvm_vcpu *vcpu,
3619                                struct x86_instruction_info *info,
3620                                enum x86_intercept_stage stage,
3621                                struct x86_exception *exception)
3622 {
3623         struct vcpu_svm *svm = to_svm(vcpu);
3624         int vmexit, ret = X86EMUL_CONTINUE;
3625         struct __x86_intercept icpt_info;
3626         struct vmcb *vmcb = svm->vmcb;
3627
3628         if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
3629                 goto out;
3630
3631         icpt_info = x86_intercept_map[info->intercept];
3632
3633         if (stage != icpt_info.stage)
3634                 goto out;
3635
3636         switch (icpt_info.exit_code) {
3637         case SVM_EXIT_READ_CR0:
3638                 if (info->intercept == x86_intercept_cr_read)
3639                         icpt_info.exit_code += info->modrm_reg;
3640                 break;
3641         case SVM_EXIT_WRITE_CR0: {
3642                 unsigned long cr0, val;
3643                 u64 intercept;
3644
3645                 if (info->intercept == x86_intercept_cr_write)
3646                         icpt_info.exit_code += info->modrm_reg;
3647
3648                 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
3649                     info->intercept == x86_intercept_clts)
3650                         break;
3651
3652                 intercept = svm->nested.intercept;
3653
3654                 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
3655                         break;
3656
3657                 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
3658                 val = info->src_val  & ~SVM_CR0_SELECTIVE_MASK;
3659
3660                 if (info->intercept == x86_intercept_lmsw) {
3661                         cr0 &= 0xfUL;
3662                         val &= 0xfUL;
3663                         /* lmsw can't clear PE - catch this here */
3664                         if (cr0 & X86_CR0_PE)
3665                                 val |= X86_CR0_PE;
3666                 }
3667
3668                 if (cr0 ^ val)
3669                         icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
3670
3671                 break;
3672         }
3673         case SVM_EXIT_READ_DR0:
3674         case SVM_EXIT_WRITE_DR0:
3675                 icpt_info.exit_code += info->modrm_reg;
3676                 break;
3677         case SVM_EXIT_MSR:
3678                 if (info->intercept == x86_intercept_wrmsr)
3679                         vmcb->control.exit_info_1 = 1;
3680                 else
3681                         vmcb->control.exit_info_1 = 0;
3682                 break;
3683         case SVM_EXIT_PAUSE:
3684                 /*
3685                  * We get this for NOP only, but pause
3686                  * is rep not, check this here
3687                  */
3688                 if (info->rep_prefix != REPE_PREFIX)
3689                         goto out;
3690                 break;
3691         case SVM_EXIT_IOIO: {
3692                 u64 exit_info;
3693                 u32 bytes;
3694
3695                 if (info->intercept == x86_intercept_in ||
3696                     info->intercept == x86_intercept_ins) {
3697                         exit_info = ((info->src_val & 0xffff) << 16) |
3698                                 SVM_IOIO_TYPE_MASK;
3699                         bytes = info->dst_bytes;
3700                 } else {
3701                         exit_info = (info->dst_val & 0xffff) << 16;
3702                         bytes = info->src_bytes;
3703                 }
3704
3705                 if (info->intercept == x86_intercept_outs ||
3706                     info->intercept == x86_intercept_ins)
3707                         exit_info |= SVM_IOIO_STR_MASK;
3708
3709                 if (info->rep_prefix)
3710                         exit_info |= SVM_IOIO_REP_MASK;
3711
3712                 bytes = min(bytes, 4u);
3713
3714                 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
3715
3716                 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
3717
3718                 vmcb->control.exit_info_1 = exit_info;
3719                 vmcb->control.exit_info_2 = info->next_rip;
3720
3721                 break;
3722         }
3723         default:
3724                 break;
3725         }
3726
3727         /* TODO: Advertise NRIPS to guest hypervisor unconditionally */
3728         if (static_cpu_has(X86_FEATURE_NRIPS))
3729                 vmcb->control.next_rip  = info->next_rip;
3730         vmcb->control.exit_code = icpt_info.exit_code;
3731         vmexit = nested_svm_exit_handled(svm);
3732
3733         ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
3734                                            : X86EMUL_CONTINUE;
3735
3736 out:
3737         return ret;
3738 }
3739
3740 static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
3741 {
3742 }
3743
3744 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
3745 {
3746         if (pause_filter_thresh)
3747                 shrink_ple_window(vcpu);
3748 }
3749
3750 static void svm_setup_mce(struct kvm_vcpu *vcpu)
3751 {
3752         /* [63:9] are reserved. */
3753         vcpu->arch.mcg_cap &= 0x1ff;
3754 }
3755
3756 bool svm_smi_blocked(struct kvm_vcpu *vcpu)
3757 {
3758         struct vcpu_svm *svm = to_svm(vcpu);
3759
3760         /* Per APM Vol.2 15.22.2 "Response to SMI" */
3761         if (!gif_set(svm))
3762                 return true;
3763
3764         return is_smm(vcpu);
3765 }
3766
3767 static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
3768 {
3769         struct vcpu_svm *svm = to_svm(vcpu);
3770         if (svm->nested.nested_run_pending)
3771                 return -EBUSY;
3772
3773         /* An SMI must not be injected into L2 if it's supposed to VM-Exit.  */
3774         if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm))
3775                 return -EBUSY;
3776
3777         return !svm_smi_blocked(vcpu);
3778 }
3779
3780 static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
3781 {
3782         struct vcpu_svm *svm = to_svm(vcpu);
3783         int ret;
3784
3785         if (is_guest_mode(vcpu)) {
3786                 /* FED8h - SVM Guest */
3787                 put_smstate(u64, smstate, 0x7ed8, 1);
3788                 /* FEE0h - SVM Guest VMCB Physical Address */
3789                 put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb);
3790
3791                 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
3792                 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3793                 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
3794
3795                 ret = nested_svm_vmexit(svm);
3796                 if (ret)
3797                         return ret;
3798         }
3799         return 0;
3800 }
3801
3802 static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
3803 {
3804         struct vcpu_svm *svm = to_svm(vcpu);
3805         struct vmcb *nested_vmcb;
3806         struct kvm_host_map map;
3807         u64 guest;
3808         u64 vmcb;
3809
3810         guest = GET_SMSTATE(u64, smstate, 0x7ed8);
3811         vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
3812
3813         if (guest) {
3814                 if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL)
3815                         return 1;
3816                 nested_vmcb = map.hva;
3817                 enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map);
3818         }
3819         return 0;
3820 }
3821
3822 static void enable_smi_window(struct kvm_vcpu *vcpu)
3823 {
3824         struct vcpu_svm *svm = to_svm(vcpu);
3825
3826         if (!gif_set(svm)) {
3827                 if (vgif_enabled(svm))
3828                         set_intercept(svm, INTERCEPT_STGI);
3829                 /* STGI will cause a vm exit */
3830         } else {
3831                 /* We must be in SMM; RSM will cause a vmexit anyway.  */
3832         }
3833 }
3834
3835 static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
3836 {
3837         unsigned long cr4 = kvm_read_cr4(vcpu);
3838         bool smep = cr4 & X86_CR4_SMEP;
3839         bool smap = cr4 & X86_CR4_SMAP;
3840         bool is_user = svm_get_cpl(vcpu) == 3;
3841
3842         /*
3843          * If RIP is invalid, go ahead with emulation which will cause an
3844          * internal error exit.
3845          */
3846         if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
3847                 return true;
3848
3849         /*
3850          * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
3851          *
3852          * Errata:
3853          * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is
3854          * possible that CPU microcode implementing DecodeAssist will fail
3855          * to read bytes of instruction which caused #NPF. In this case,
3856          * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly
3857          * return 0 instead of the correct guest instruction bytes.
3858          *
3859          * This happens because CPU microcode reading instruction bytes
3860          * uses a special opcode which attempts to read data using CPL=0
3861          * priviledges. The microcode reads CS:RIP and if it hits a SMAP
3862          * fault, it gives up and returns no instruction bytes.
3863          *
3864          * Detection:
3865          * We reach here in case CPU supports DecodeAssist, raised #NPF and
3866          * returned 0 in GuestIntrBytes field of the VMCB.
3867          * First, errata can only be triggered in case vCPU CR4.SMAP=1.
3868          * Second, if vCPU CR4.SMEP=1, errata could only be triggered
3869          * in case vCPU CPL==3 (Because otherwise guest would have triggered
3870          * a SMEP fault instead of #NPF).
3871          * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL.
3872          * As most guests enable SMAP if they have also enabled SMEP, use above
3873          * logic in order to attempt minimize false-positive of detecting errata
3874          * while still preserving all cases semantic correctness.
3875          *
3876          * Workaround:
3877          * To determine what instruction the guest was executing, the hypervisor
3878          * will have to decode the instruction at the instruction pointer.
3879          *
3880          * In non SEV guest, hypervisor will be able to read the guest
3881          * memory to decode the instruction pointer when insn_len is zero
3882          * so we return true to indicate that decoding is possible.
3883          *
3884          * But in the SEV guest, the guest memory is encrypted with the
3885          * guest specific key and hypervisor will not be able to decode the
3886          * instruction pointer so we will not able to workaround it. Lets
3887          * print the error and request to kill the guest.
3888          */
3889         if (smap && (!smep || is_user)) {
3890                 if (!sev_guest(vcpu->kvm))
3891                         return true;
3892
3893                 pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
3894                 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
3895         }
3896
3897         return false;
3898 }
3899
3900 static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
3901 {
3902         struct vcpu_svm *svm = to_svm(vcpu);
3903
3904         /*
3905          * TODO: Last condition latch INIT signals on vCPU when
3906          * vCPU is in guest-mode and vmcb12 defines intercept on INIT.
3907          * To properly emulate the INIT intercept,
3908          * svm_check_nested_events() should call nested_svm_vmexit()
3909          * if an INIT signal is pending.
3910          */
3911         return !gif_set(svm) ||
3912                    (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
3913 }
3914
3915 static void svm_vm_destroy(struct kvm *kvm)
3916 {
3917         avic_vm_destroy(kvm);
3918         sev_vm_destroy(kvm);
3919 }
3920
3921 static int svm_vm_init(struct kvm *kvm)
3922 {
3923         if (avic) {
3924                 int ret = avic_vm_init(kvm);
3925                 if (ret)
3926                         return ret;
3927         }
3928
3929         kvm_apicv_init(kvm, avic);
3930         return 0;
3931 }
3932
3933 static struct kvm_x86_ops svm_x86_ops __initdata = {
3934         .hardware_unsetup = svm_hardware_teardown,
3935         .hardware_enable = svm_hardware_enable,
3936         .hardware_disable = svm_hardware_disable,
3937         .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
3938         .has_emulated_msr = svm_has_emulated_msr,
3939
3940         .vcpu_create = svm_create_vcpu,
3941         .vcpu_free = svm_free_vcpu,
3942         .vcpu_reset = svm_vcpu_reset,
3943
3944         .vm_size = sizeof(struct kvm_svm),
3945         .vm_init = svm_vm_init,
3946         .vm_destroy = svm_vm_destroy,
3947
3948         .prepare_guest_switch = svm_prepare_guest_switch,
3949         .vcpu_load = svm_vcpu_load,
3950         .vcpu_put = svm_vcpu_put,
3951         .vcpu_blocking = svm_vcpu_blocking,
3952         .vcpu_unblocking = svm_vcpu_unblocking,
3953
3954         .update_bp_intercept = update_bp_intercept,
3955         .get_msr_feature = svm_get_msr_feature,
3956         .get_msr = svm_get_msr,
3957         .set_msr = svm_set_msr,
3958         .get_segment_base = svm_get_segment_base,
3959         .get_segment = svm_get_segment,
3960         .set_segment = svm_set_segment,
3961         .get_cpl = svm_get_cpl,
3962         .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
3963         .set_cr0 = svm_set_cr0,
3964         .set_cr4 = svm_set_cr4,
3965         .set_efer = svm_set_efer,
3966         .get_idt = svm_get_idt,
3967         .set_idt = svm_set_idt,
3968         .get_gdt = svm_get_gdt,
3969         .set_gdt = svm_set_gdt,
3970         .set_dr7 = svm_set_dr7,
3971         .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
3972         .cache_reg = svm_cache_reg,
3973         .get_rflags = svm_get_rflags,
3974         .set_rflags = svm_set_rflags,
3975
3976         .tlb_flush_all = svm_flush_tlb,
3977         .tlb_flush_current = svm_flush_tlb,
3978         .tlb_flush_gva = svm_flush_tlb_gva,
3979         .tlb_flush_guest = svm_flush_tlb,
3980
3981         .run = svm_vcpu_run,
3982         .handle_exit = handle_exit,
3983         .skip_emulated_instruction = skip_emulated_instruction,
3984         .update_emulated_instruction = NULL,
3985         .set_interrupt_shadow = svm_set_interrupt_shadow,
3986         .get_interrupt_shadow = svm_get_interrupt_shadow,
3987         .patch_hypercall = svm_patch_hypercall,
3988         .set_irq = svm_set_irq,
3989         .set_nmi = svm_inject_nmi,
3990         .queue_exception = svm_queue_exception,
3991         .cancel_injection = svm_cancel_injection,
3992         .interrupt_allowed = svm_interrupt_allowed,
3993         .nmi_allowed = svm_nmi_allowed,
3994         .get_nmi_mask = svm_get_nmi_mask,
3995         .set_nmi_mask = svm_set_nmi_mask,
3996         .enable_nmi_window = enable_nmi_window,
3997         .enable_irq_window = enable_irq_window,
3998         .update_cr8_intercept = update_cr8_intercept,
3999         .set_virtual_apic_mode = svm_set_virtual_apic_mode,
4000         .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
4001         .check_apicv_inhibit_reasons = svm_check_apicv_inhibit_reasons,
4002         .pre_update_apicv_exec_ctrl = svm_pre_update_apicv_exec_ctrl,
4003         .load_eoi_exitmap = svm_load_eoi_exitmap,
4004         .hwapic_irr_update = svm_hwapic_irr_update,
4005         .hwapic_isr_update = svm_hwapic_isr_update,
4006         .sync_pir_to_irr = kvm_lapic_find_highest_irr,
4007         .apicv_post_state_restore = avic_post_state_restore,
4008
4009         .set_tss_addr = svm_set_tss_addr,
4010         .set_identity_map_addr = svm_set_identity_map_addr,
4011         .get_tdp_level = get_npt_level,
4012         .get_mt_mask = svm_get_mt_mask,
4013
4014         .get_exit_info = svm_get_exit_info,
4015
4016         .cpuid_update = svm_cpuid_update,
4017
4018         .has_wbinvd_exit = svm_has_wbinvd_exit,
4019
4020         .write_l1_tsc_offset = svm_write_l1_tsc_offset,
4021
4022         .load_mmu_pgd = svm_load_mmu_pgd,
4023
4024         .check_intercept = svm_check_intercept,
4025         .handle_exit_irqoff = svm_handle_exit_irqoff,
4026
4027         .request_immediate_exit = __kvm_request_immediate_exit,
4028
4029         .sched_in = svm_sched_in,
4030
4031         .pmu_ops = &amd_pmu_ops,
4032         .nested_ops = &svm_nested_ops,
4033
4034         .deliver_posted_interrupt = svm_deliver_avic_intr,
4035         .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
4036         .update_pi_irte = svm_update_pi_irte,
4037         .setup_mce = svm_setup_mce,
4038
4039         .smi_allowed = svm_smi_allowed,
4040         .pre_enter_smm = svm_pre_enter_smm,
4041         .pre_leave_smm = svm_pre_leave_smm,
4042         .enable_smi_window = enable_smi_window,
4043
4044         .mem_enc_op = svm_mem_enc_op,
4045         .mem_enc_reg_region = svm_register_enc_region,
4046         .mem_enc_unreg_region = svm_unregister_enc_region,
4047
4048         .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
4049
4050         .apic_init_signal_blocked = svm_apic_init_signal_blocked,
4051 };
4052
4053 static struct kvm_x86_init_ops svm_init_ops __initdata = {
4054         .cpu_has_kvm_support = has_svm,
4055         .disabled_by_bios = is_disabled,
4056         .hardware_setup = svm_hardware_setup,
4057         .check_processor_compatibility = svm_check_processor_compat,
4058
4059         .runtime_ops = &svm_x86_ops,
4060 };
4061
4062 static int __init svm_init(void)
4063 {
4064         return kvm_init(&svm_init_ops, sizeof(struct vcpu_svm),
4065                         __alignof__(struct vcpu_svm), THIS_MODULE);
4066 }
4067
4068 static void __exit svm_exit(void)
4069 {
4070         kvm_exit();
4071 }
4072
4073 module_init(svm_init)
4074 module_exit(svm_exit)