1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2018, Red Hat, Inc.
5 * Tests for Enlightened VMCS, including nested guest state.
7 #define _GNU_SOURCE /* for program_invocation_short_name */
12 #include <sys/ioctl.h>
13 #include <linux/bitmap.h>
15 #include "test_util.h"
24 static void guest_ud_handler(struct ex_regs *regs)
27 regs->rip += 3; /* VMLAUNCH */
30 static void guest_nmi_handler(struct ex_regs *regs)
34 static inline void rdmsr_from_l2(uint32_t msr)
36 /* Currently, L1 doesn't preserve GPRs during vmexits. */
37 __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
38 "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
39 "r10", "r11", "r12", "r13", "r14", "r15");
42 /* Exit to L1 from L2 with RDMSR instruction */
43 void l2_guest_code(void)
51 /* Forced exit to L1 upon restore */
56 /* MSR-Bitmap tests */
57 rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
58 rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
59 rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
61 rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
63 /* L2 TLB flush tests */
64 hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
65 HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
66 rdmsr_from_l2(MSR_FS_BASE);
68 * Note: hypercall status (RAX) is not preserved correctly by L1 after
69 * synthetic vmexit, use unchecked version.
71 __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
72 HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
75 /* Done, exit to L1 and never come back. */
79 void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
80 vm_vaddr_t hv_hcall_page_gpa)
82 #define L2_GUEST_STACK_SIZE 64
83 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
85 wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
86 wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
93 enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
96 GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
98 GUEST_ASSERT(load_evmcs(hv_pages));
99 GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
102 GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
104 prepare_vmcs(vmx_pages, l2_guest_code,
105 &l2_guest_stack[L2_GUEST_STACK_SIZE]);
108 GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
109 current_evmcs->revision_id = -1u;
110 GUEST_ASSERT(vmlaunch());
111 current_evmcs->revision_id = EVMCS_VERSION;
114 vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
115 PIN_BASED_NMI_EXITING);
117 /* L2 TLB flush setup */
118 current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
119 current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
120 current_evmcs->hv_vm_id = 1;
121 current_evmcs->hv_vp_id = 1;
122 current_vp_assist->nested_control.features.directhypercall = 1;
123 *(u32 *)(hv_pages->partition_assist) = 0;
125 GUEST_ASSERT(!vmlaunch());
126 GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
127 GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
128 GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
131 * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
132 * up-to-date (RIP points where it should and not at the beginning
133 * of l2_guest_code(). GUEST_SYNC(9) checkes that.
135 GUEST_ASSERT(!vmresume());
139 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
140 current_evmcs->guest_rip += 3; /* vmcall */
142 /* Intercept RDMSR 0xc0000100 */
143 vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
144 CPU_BASED_USE_MSR_BITMAPS);
145 __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
146 GUEST_ASSERT(!vmresume());
147 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
148 current_evmcs->guest_rip += 2; /* rdmsr */
150 /* Enable enlightened MSR bitmap */
151 current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
152 GUEST_ASSERT(!vmresume());
153 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
154 current_evmcs->guest_rip += 2; /* rdmsr */
156 /* Intercept RDMSR 0xc0000101 without telling KVM about it */
157 __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
158 /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
159 current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
160 GUEST_ASSERT(!vmresume());
161 /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
162 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
163 current_evmcs->guest_rip += 3; /* vmcall */
165 /* Now tell KVM we've changed MSR-Bitmap */
166 current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
167 GUEST_ASSERT(!vmresume());
168 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
169 current_evmcs->guest_rip += 2; /* rdmsr */
172 * L2 TLB flush test. First VMCALL should be handled directly by L0,
173 * no VMCALL exit expected.
175 GUEST_ASSERT(!vmresume());
176 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
177 current_evmcs->guest_rip += 2; /* rdmsr */
178 /* Enable synthetic vmexit */
179 *(u32 *)(hv_pages->partition_assist) = 1;
180 GUEST_ASSERT(!vmresume());
181 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
183 GUEST_ASSERT(!vmresume());
184 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
187 /* Try enlightened vmptrld with an incorrect GPA */
188 evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
189 GUEST_ASSERT(vmlaunch());
190 GUEST_ASSERT(ud_count == 1);
194 void inject_nmi(struct kvm_vcpu *vcpu)
196 struct kvm_vcpu_events events;
198 vcpu_events_get(vcpu, &events);
200 events.nmi.pending = 1;
201 events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
203 vcpu_events_set(vcpu, &events);
206 static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
207 struct kvm_vcpu *vcpu)
209 struct kvm_regs regs1, regs2;
210 struct kvm_x86_state *state;
212 state = vcpu_save_state(vcpu);
213 memset(®s1, 0, sizeof(regs1));
214 vcpu_regs_get(vcpu, ®s1);
218 /* Restore state in a new VM. */
219 vcpu = vm_recreate_with_one_vcpu(vm);
220 vcpu_set_hv_cpuid(vcpu);
221 vcpu_enable_evmcs(vcpu);
222 vcpu_load_state(vcpu, state);
223 kvm_x86_state_cleanup(state);
225 memset(®s2, 0, sizeof(regs2));
226 vcpu_regs_get(vcpu, ®s2);
227 TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
228 "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
229 (ulong) regs2.rdi, (ulong) regs2.rsi);
233 int main(int argc, char *argv[])
235 vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
236 vm_vaddr_t hcall_page;
238 struct kvm_vcpu *vcpu;
243 TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
244 TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
245 TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
246 TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH));
248 vm = vm_create_with_one_vcpu(&vcpu, guest_code);
250 hcall_page = vm_vaddr_alloc_pages(vm, 1);
251 memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
253 vcpu_set_hv_cpuid(vcpu);
254 vcpu_enable_evmcs(vcpu);
256 vcpu_alloc_vmx(vm, &vmx_pages_gva);
257 vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
258 vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
259 vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
261 vm_init_descriptor_tables(vm);
262 vcpu_init_descriptor_tables(vcpu);
263 vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
264 vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
266 pr_info("Running L1 which uses EVMCS to run L2\n");
268 for (stage = 1;; stage++) {
270 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
272 switch (get_ucall(vcpu, &uc)) {
274 REPORT_GUEST_ASSERT(uc);
281 TEST_FAIL("Unknown ucall %lu", uc.cmd);
284 /* UCALL_SYNC is handled here. */
285 TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
286 uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
287 stage, (ulong)uc.args[1]);
289 vcpu = save_restore_vm(vm, vcpu);
291 /* Force immediate L2->L1 exit before resuming */
293 pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
298 * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
299 * restored VM (before the first KVM_RUN) to check that
300 * KVM_STATE_NESTED_EVMCS is not lost.
303 pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
304 vcpu = save_restore_vm(vm, vcpu);