1 // SPDX-License-Identifier: GPL-2.0
3 * page_fault_test.c - Test stage 2 faults.
5 * This test tries different combinations of guest accesses (e.g., write,
6 * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
7 * hugetlbfs with a hole). It checks that the expected handling method is
8 * called (e.g., uffd faults with the right address and write/read flag).
11 #include <linux/bitmap.h>
13 #include <test_util.h>
15 #include <processor.h>
16 #include <asm/sysreg.h>
17 #include <linux/bitfield.h>
18 #include "guest_modes.h"
19 #include "userfaultfd_util.h"
21 /* Guest virtual addresses that point to the test page and its PTE. */
22 #define TEST_GVA 0xc0000000
23 #define TEST_EXEC_GVA (TEST_GVA + 0x8)
24 #define TEST_PTE_GVA 0xb0000000
25 #define TEST_DATA 0x0123456789ABCDEF
27 static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
30 #define CMD_SKIP_TEST (1ULL << 1)
31 #define CMD_HOLE_PT (1ULL << 2)
32 #define CMD_HOLE_DATA (1ULL << 3)
33 #define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4)
34 #define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5)
35 #define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6)
36 #define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7)
37 #define CMD_SET_PTE_AF (1ULL << 8)
39 #define PREPARE_FN_NR 10
40 #define CHECK_FN_NR 10
42 static struct event_cnt {
46 /* uffd_faults is incremented from multiple threads. */
47 pthread_mutex_t uffd_faults_mutex;
52 uint64_t mem_mark_cmd;
53 /* Skip the test if any prepare function returns false */
54 bool (*guest_prepare[PREPARE_FN_NR])(void);
55 void (*guest_test)(void);
56 void (*guest_test_check[CHECK_FN_NR])(void);
57 uffd_handler_t uffd_pt_handler;
58 uffd_handler_t uffd_data_handler;
59 void (*dabt_handler)(struct ex_regs *regs);
60 void (*iabt_handler)(struct ex_regs *regs);
61 void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);
62 void (*fail_vcpu_run_handler)(int ret);
63 uint32_t pt_memslot_flags;
64 uint32_t data_memslot_flags;
66 struct event_cnt expected_events;
70 enum vm_mem_backing_src_type src_type;
71 struct test_desc *test_desc;
74 static inline void flush_tlb_page(uint64_t vaddr)
76 uint64_t page = vaddr >> 12;
79 asm volatile("tlbi vaae1is, %0" :: "r" (page));
84 static void guest_write64(void)
88 WRITE_ONCE(*guest_test_memory, TEST_DATA);
89 val = READ_ONCE(*guest_test_memory);
90 GUEST_ASSERT_EQ(val, TEST_DATA);
93 /* Check the system for atomic instructions. */
94 static bool guest_check_lse(void)
96 uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
99 atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
103 static bool guest_check_dc_zva(void)
105 uint64_t dczid = read_sysreg(dczid_el0);
106 uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
111 /* Compare and swap instruction. */
112 static void guest_cas(void)
116 GUEST_ASSERT(guest_check_lse());
117 asm volatile(".arch_extension lse\n"
118 "casal %0, %1, [%2]\n"
119 :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));
120 val = READ_ONCE(*guest_test_memory);
121 GUEST_ASSERT_EQ(val, TEST_DATA);
124 static void guest_read64(void)
128 val = READ_ONCE(*guest_test_memory);
129 GUEST_ASSERT_EQ(val, 0);
132 /* Address translation instruction */
133 static void guest_at(void)
137 asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
139 par = read_sysreg(par_el1);
141 /* Bit 1 indicates whether the AT was successful */
142 GUEST_ASSERT_EQ(par & 1, 0);
146 * The size of the block written by "dc zva" is guaranteed to be between (2 <<
147 * 0) and (2 << 9), which is safe in our case as we need the write to happen
148 * for at least a word, and not more than a page.
150 static void guest_dc_zva(void)
154 asm volatile("dc zva, %0" :: "r" (guest_test_memory));
156 val = READ_ONCE(*guest_test_memory);
157 GUEST_ASSERT_EQ(val, 0);
161 * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0).
162 * And that's special because KVM must take special care with those: they
163 * should still count as accesses for dirty logging or user-faulting, but
164 * should be handled differently on mmio.
166 static void guest_ld_preidx(void)
169 uint64_t addr = TEST_GVA - 8;
172 * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is
173 * in a gap between memslots not backing by anything.
175 asm volatile("ldr %0, [%1, #8]!"
176 : "=r" (val), "+r" (addr));
177 GUEST_ASSERT_EQ(val, 0);
178 GUEST_ASSERT_EQ(addr, TEST_GVA);
181 static void guest_st_preidx(void)
183 uint64_t val = TEST_DATA;
184 uint64_t addr = TEST_GVA - 8;
186 asm volatile("str %0, [%1, #8]!"
187 : "+r" (val), "+r" (addr));
189 GUEST_ASSERT_EQ(addr, TEST_GVA);
190 val = READ_ONCE(*guest_test_memory);
193 static bool guest_set_ha(void)
195 uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
198 /* Skip if HA is not supported. */
199 hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
203 tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
204 write_sysreg(tcr, tcr_el1);
210 static bool guest_clear_pte_af(void)
212 *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;
213 flush_tlb_page(TEST_GVA);
218 static void guest_check_pte_af(void)
221 GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
224 static void guest_check_write_in_dirty_log(void)
226 GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG);
229 static void guest_check_no_write_in_dirty_log(void)
231 GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG);
234 static void guest_check_s1ptw_wr_in_dirty_log(void)
236 GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);
239 static void guest_check_no_s1ptw_wr_in_dirty_log(void)
241 GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG);
244 static void guest_exec(void)
246 int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
250 GUEST_ASSERT_EQ(ret, 0x77);
253 static bool guest_prepare(struct test_desc *test)
255 bool (*prepare_fn)(void);
258 for (i = 0; i < PREPARE_FN_NR; i++) {
259 prepare_fn = test->guest_prepare[i];
260 if (prepare_fn && !prepare_fn())
267 static void guest_test_check(struct test_desc *test)
269 void (*check_fn)(void);
272 for (i = 0; i < CHECK_FN_NR; i++) {
273 check_fn = test->guest_test_check[i];
279 static void guest_code(struct test_desc *test)
281 if (!guest_prepare(test))
282 GUEST_SYNC(CMD_SKIP_TEST);
284 GUEST_SYNC(test->mem_mark_cmd);
286 if (test->guest_test)
289 guest_test_check(test);
293 static void no_dabt_handler(struct ex_regs *regs)
295 GUEST_FAIL("Unexpected dabt, far_el1 = 0x%llx", read_sysreg(far_el1));
298 static void no_iabt_handler(struct ex_regs *regs)
300 GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
303 static struct uffd_args {
306 uint64_t paging_size;
307 } pt_args, data_args;
309 /* Returns true to continue the test, and false if it should be skipped. */
310 static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
311 struct uffd_args *args)
313 uint64_t addr = msg->arg.pagefault.address;
314 uint64_t flags = msg->arg.pagefault.flags;
315 struct uffdio_copy copy;
318 TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
319 "The only expected UFFD mode is MISSING");
320 TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
322 pr_debug("uffd fault: addr=%p write=%d\n",
323 (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
325 copy.src = (uint64_t)args->copy;
327 copy.len = args->paging_size;
330 ret = ioctl(uffd, UFFDIO_COPY, ©);
332 pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n",
337 pthread_mutex_lock(&events.uffd_faults_mutex);
338 events.uffd_faults += 1;
339 pthread_mutex_unlock(&events.uffd_faults_mutex);
343 static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg)
345 return uffd_generic_handler(mode, uffd, msg, &pt_args);
348 static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg)
350 return uffd_generic_handler(mode, uffd, msg, &data_args);
353 static void setup_uffd_args(struct userspace_mem_region *region,
354 struct uffd_args *args)
356 args->hva = (void *)region->region.userspace_addr;
357 args->paging_size = region->region.memory_size;
359 args->copy = malloc(args->paging_size);
360 TEST_ASSERT(args->copy, "Failed to allocate data copy.");
361 memcpy(args->copy, args->hva, args->paging_size);
364 static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
365 struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd)
367 struct test_desc *test = p->test_desc;
368 int uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
370 setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args);
371 setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args);
374 if (test->uffd_pt_handler)
375 *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
378 test->uffd_pt_handler);
381 if (test->uffd_data_handler)
382 *data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
384 data_args.paging_size,
385 test->uffd_data_handler);
388 static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
389 struct uffd_desc *data_uffd)
391 if (test->uffd_pt_handler)
392 uffd_stop_demand_paging(pt_uffd);
393 if (test->uffd_data_handler)
394 uffd_stop_demand_paging(data_uffd);
397 free(data_args.copy);
400 static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg)
402 TEST_FAIL("There was no UFFD fault expected.");
406 /* Returns false if the test should be skipped. */
407 static bool punch_hole_in_backing_store(struct kvm_vm *vm,
408 struct userspace_mem_region *region)
410 void *hva = (void *)region->region.userspace_addr;
411 uint64_t paging_size = region->region.memory_size;
412 int ret, fd = region->fd;
415 ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
417 TEST_ASSERT(ret == 0, "fallocate failed");
419 ret = madvise(hva, paging_size, MADV_DONTNEED);
420 TEST_ASSERT(ret == 0, "madvise failed");
426 static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
428 struct userspace_mem_region *region;
431 region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
432 hva = (void *)region->region.userspace_addr;
434 TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
436 memcpy(hva, run->mmio.data, run->mmio.len);
437 events.mmio_exits += 1;
440 static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
444 memcpy(&data, run->mmio.data, sizeof(data));
445 pr_debug("addr=%lld len=%d w=%d data=%lx\n",
446 run->mmio.phys_addr, run->mmio.len,
447 run->mmio.is_write, data);
448 TEST_FAIL("There was no MMIO exit expected.");
451 static bool check_write_in_dirty_log(struct kvm_vm *vm,
452 struct userspace_mem_region *region,
456 bool first_page_dirty;
457 uint64_t size = region->region.memory_size;
459 /* getpage_size() is not always equal to vm->page_size */
460 bmap = bitmap_zalloc(size / getpagesize());
461 kvm_vm_get_dirty_log(vm, region->region.slot, bmap);
462 first_page_dirty = test_bit(host_pg_nr, bmap);
464 return first_page_dirty;
467 /* Returns true to continue the test, and false if it should be skipped. */
468 static bool handle_cmd(struct kvm_vm *vm, int cmd)
470 struct userspace_mem_region *data_region, *pt_region;
471 bool continue_test = true;
472 uint64_t pte_gpa, pte_pg;
474 data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
475 pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
476 pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
477 pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize();
479 if (cmd == CMD_SKIP_TEST)
480 continue_test = false;
482 if (cmd & CMD_HOLE_PT)
483 continue_test = punch_hole_in_backing_store(vm, pt_region);
484 if (cmd & CMD_HOLE_DATA)
485 continue_test = punch_hole_in_backing_store(vm, data_region);
486 if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)
487 TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),
488 "Missing write in dirty log");
489 if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
490 TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg),
491 "Missing s1ptw write in dirty log");
492 if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
493 TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),
494 "Unexpected write in dirty log");
495 if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
496 TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg),
497 "Unexpected s1ptw write in dirty log");
499 return continue_test;
502 void fail_vcpu_run_no_handler(int ret)
504 TEST_FAIL("Unexpected vcpu run failure");
507 void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
509 TEST_ASSERT(errno == ENOSYS,
510 "The mmio handler should have returned not implemented.");
511 events.fail_vcpu_runs += 1;
514 typedef uint32_t aarch64_insn_t;
515 extern aarch64_insn_t __exec_test[2];
517 noinline void __return_0x77(void)
519 asm volatile("__exec_test: mov x0, #0x77\n"
524 * Note that this function runs on the host before the test VM starts: there's
525 * no need to sync the D$ and I$ caches.
527 static void load_exec_code_for_test(struct kvm_vm *vm)
530 struct userspace_mem_region *region;
533 region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
534 hva = (void *)region->region.userspace_addr;
536 assert(TEST_EXEC_GVA > TEST_GVA);
537 code = hva + TEST_EXEC_GVA - TEST_GVA;
538 memcpy(code, __exec_test, sizeof(__exec_test));
541 static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
542 struct test_desc *test)
544 vm_init_descriptor_tables(vm);
545 vcpu_init_descriptor_tables(vcpu);
547 vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
548 ESR_EC_DABT, no_dabt_handler);
549 vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
550 ESR_EC_IABT, no_iabt_handler);
553 static void setup_gva_maps(struct kvm_vm *vm)
555 struct userspace_mem_region *region;
558 region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
559 /* Map TEST_GVA first. This will install a new PTE. */
560 virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr);
561 /* Then map TEST_PTE_GVA to the above PTE. */
562 pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
563 virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);
566 enum pf_test_memslots {
567 CODE_AND_DATA_MEMSLOT,
573 * Create a memslot for code and data at pfn=0, and test-data and PT ones
576 static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
578 uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
579 uint64_t guest_page_size = vm->page_size;
580 uint64_t max_gfn = vm_compute_max_gfn(vm);
581 /* Enough for 2M of code when using 4K guest pages. */
582 uint64_t code_npages = 512;
583 uint64_t pt_size, data_size, data_gpa;
586 * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using
587 * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13
588 * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use
589 * twice that just in case.
591 pt_size = 26 * guest_page_size;
593 /* memslot sizes and gpa's must be aligned to the backing page size */
594 pt_size = align_up(pt_size, backing_src_pagesz);
595 data_size = align_up(guest_page_size, backing_src_pagesz);
596 data_gpa = (max_gfn * guest_page_size) - data_size;
597 data_gpa = align_down(data_gpa, backing_src_pagesz);
599 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0,
600 CODE_AND_DATA_MEMSLOT, code_npages, 0);
601 vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT;
602 vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT;
604 vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size,
605 PAGE_TABLE_MEMSLOT, pt_size / guest_page_size,
606 p->test_desc->pt_memslot_flags);
607 vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT;
609 vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT,
610 data_size / guest_page_size,
611 p->test_desc->data_memslot_flags);
612 vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
615 static void setup_ucall(struct kvm_vm *vm)
617 struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
619 ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
622 static void setup_default_handlers(struct test_desc *test)
624 if (!test->mmio_handler)
625 test->mmio_handler = mmio_no_handler;
627 if (!test->fail_vcpu_run_handler)
628 test->fail_vcpu_run_handler = fail_vcpu_run_no_handler;
631 static void check_event_counts(struct test_desc *test)
633 TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
634 TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
635 TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
638 static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
640 struct test_desc *test = p->test_desc;
642 pr_debug("Test: %s\n", test->name);
643 pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));
644 pr_debug("Testing memory backing src type: %s\n",
645 vm_mem_backing_src_alias(p->src_type)->name);
648 static void reset_event_counts(void)
650 memset(&events, 0, sizeof(events));
654 * This function either succeeds, skips the test (after setting test->skip), or
655 * fails with a TEST_FAIL that aborts all tests.
657 static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
658 struct test_desc *test)
667 ret = _vcpu_run(vcpu);
669 test->fail_vcpu_run_handler(ret);
673 switch (get_ucall(vcpu, &uc)) {
675 if (!handle_cmd(vm, uc.args[1])) {
681 REPORT_GUEST_ASSERT(uc);
686 if (run->exit_reason == KVM_EXIT_MMIO)
687 test->mmio_handler(vm, run);
690 TEST_FAIL("Unknown ucall %lu", uc.cmd);
695 pr_debug(test->skip ? "Skipped.\n" : "Done.\n");
698 static void run_test(enum vm_guest_mode mode, void *arg)
700 struct test_params *p = (struct test_params *)arg;
701 struct test_desc *test = p->test_desc;
703 struct kvm_vcpu *vcpu;
704 struct uffd_desc *pt_uffd, *data_uffd;
706 print_test_banner(mode, p);
708 vm = ____vm_create(VM_SHAPE(mode));
709 setup_memslots(vm, p);
710 kvm_vm_elf_load(vm, program_invocation_name);
712 vcpu = vm_vcpu_add(vm, 0, guest_code);
716 reset_event_counts();
719 * Set some code in the data memslot for the guest to execute (only
720 * applicable to the EXEC tests). This has to be done before
721 * setup_uffd() as that function copies the memslot data for the uffd
724 load_exec_code_for_test(vm);
725 setup_uffd(vm, p, &pt_uffd, &data_uffd);
726 setup_abort_handlers(vm, vcpu, test);
727 setup_default_handlers(test);
728 vcpu_args_set(vcpu, 1, test);
730 vcpu_run_loop(vm, vcpu, test);
733 free_uffd(test, pt_uffd, data_uffd);
736 * Make sure we check the events after the uffd threads have exited,
737 * which means they updated their respective event counters.
740 check_event_counts(test);
743 static void help(char *name)
746 printf("usage: %s [-h] [-s mem-type]\n", name);
749 backing_src_help("-s");
754 #define SCAT2(a, b) SNAME(a ## _ ## b)
755 #define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c))
756 #define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d))
758 #define _CHECK(_test) _CHECK_##_test
759 #define _PREPARE(_test) _PREPARE_##_test
760 #define _PREPARE_guest_read64 NULL
761 #define _PREPARE_guest_ld_preidx NULL
762 #define _PREPARE_guest_write64 NULL
763 #define _PREPARE_guest_st_preidx NULL
764 #define _PREPARE_guest_exec NULL
765 #define _PREPARE_guest_at NULL
766 #define _PREPARE_guest_dc_zva guest_check_dc_zva
767 #define _PREPARE_guest_cas guest_check_lse
769 /* With or without access flag checks */
770 #define _PREPARE_with_af guest_set_ha, guest_clear_pte_af
771 #define _PREPARE_no_af NULL
772 #define _CHECK_with_af guest_check_pte_af
773 #define _CHECK_no_af NULL
775 /* Performs an access and checks that no faults were triggered. */
776 #define TEST_ACCESS(_access, _with_af, _mark_cmd) \
778 .name = SCAT3(_access, _with_af, #_mark_cmd), \
779 .guest_prepare = { _PREPARE(_with_af), \
780 _PREPARE(_access) }, \
781 .mem_mark_cmd = _mark_cmd, \
782 .guest_test = _access, \
783 .guest_test_check = { _CHECK(_with_af) }, \
784 .expected_events = { 0 }, \
787 #define TEST_UFFD(_access, _with_af, _mark_cmd, \
788 _uffd_data_handler, _uffd_pt_handler, _uffd_faults) \
790 .name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \
791 .guest_prepare = { _PREPARE(_with_af), \
792 _PREPARE(_access) }, \
793 .guest_test = _access, \
794 .mem_mark_cmd = _mark_cmd, \
795 .guest_test_check = { _CHECK(_with_af) }, \
796 .uffd_data_handler = _uffd_data_handler, \
797 .uffd_pt_handler = _uffd_pt_handler, \
798 .expected_events = { .uffd_faults = _uffd_faults, }, \
801 #define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check) \
803 .name = SCAT3(dirty_log, _access, _with_af), \
804 .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
805 .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
806 .guest_prepare = { _PREPARE(_with_af), \
807 _PREPARE(_access) }, \
808 .guest_test = _access, \
809 .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
810 .expected_events = { 0 }, \
813 #define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \
814 _uffd_faults, _test_check, _pt_check) \
816 .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \
817 .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
818 .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
819 .guest_prepare = { _PREPARE(_with_af), \
820 _PREPARE(_access) }, \
821 .guest_test = _access, \
822 .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
823 .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
824 .uffd_data_handler = _uffd_data_handler, \
825 .uffd_pt_handler = uffd_pt_handler, \
826 .expected_events = { .uffd_faults = _uffd_faults, }, \
829 #define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \
831 .name = SCAT2(ro_memslot, _access), \
832 .data_memslot_flags = KVM_MEM_READONLY, \
833 .pt_memslot_flags = KVM_MEM_READONLY, \
834 .guest_prepare = { _PREPARE(_access) }, \
835 .guest_test = _access, \
836 .mmio_handler = _mmio_handler, \
837 .expected_events = { .mmio_exits = _mmio_exits }, \
840 #define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \
842 .name = SCAT2(ro_memslot_no_syndrome, _access), \
843 .data_memslot_flags = KVM_MEM_READONLY, \
844 .pt_memslot_flags = KVM_MEM_READONLY, \
845 .guest_prepare = { _PREPARE(_access) }, \
846 .guest_test = _access, \
847 .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
848 .expected_events = { .fail_vcpu_runs = 1 }, \
851 #define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \
854 .name = SCAT2(ro_memslot, _access), \
855 .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
856 .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
857 .guest_prepare = { _PREPARE(_access) }, \
858 .guest_test = _access, \
859 .guest_test_check = { _test_check }, \
860 .mmio_handler = _mmio_handler, \
861 .expected_events = { .mmio_exits = _mmio_exits}, \
864 #define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \
866 .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \
867 .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
868 .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
869 .guest_prepare = { _PREPARE(_access) }, \
870 .guest_test = _access, \
871 .guest_test_check = { _test_check }, \
872 .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
873 .expected_events = { .fail_vcpu_runs = 1 }, \
876 #define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \
877 _uffd_data_handler, _uffd_faults) \
879 .name = SCAT2(ro_memslot_uffd, _access), \
880 .data_memslot_flags = KVM_MEM_READONLY, \
881 .pt_memslot_flags = KVM_MEM_READONLY, \
882 .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
883 .guest_prepare = { _PREPARE(_access) }, \
884 .guest_test = _access, \
885 .uffd_data_handler = _uffd_data_handler, \
886 .uffd_pt_handler = uffd_pt_handler, \
887 .mmio_handler = _mmio_handler, \
888 .expected_events = { .mmio_exits = _mmio_exits, \
889 .uffd_faults = _uffd_faults }, \
892 #define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \
895 .name = SCAT2(ro_memslot_no_syndrome, _access), \
896 .data_memslot_flags = KVM_MEM_READONLY, \
897 .pt_memslot_flags = KVM_MEM_READONLY, \
898 .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
899 .guest_prepare = { _PREPARE(_access) }, \
900 .guest_test = _access, \
901 .uffd_data_handler = _uffd_data_handler, \
902 .uffd_pt_handler = uffd_pt_handler, \
903 .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
904 .expected_events = { .fail_vcpu_runs = 1, \
905 .uffd_faults = _uffd_faults }, \
908 static struct test_desc tests[] = {
910 /* Check that HW is setting the Access Flag (AF) (sanity checks). */
911 TEST_ACCESS(guest_read64, with_af, CMD_NONE),
912 TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),
913 TEST_ACCESS(guest_cas, with_af, CMD_NONE),
914 TEST_ACCESS(guest_write64, with_af, CMD_NONE),
915 TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),
916 TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),
917 TEST_ACCESS(guest_exec, with_af, CMD_NONE),
920 * Punch a hole in the data backing store, and then try multiple
921 * accesses: reads should rturn zeroes, and writes should
922 * re-populate the page. Moreover, the test also check that no
923 * exception was generated in the guest. Note that this
924 * reading/writing behavior is the same as reading/writing a
925 * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from
928 TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),
929 TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),
930 TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),
931 TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),
932 TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),
933 TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
934 TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
937 * Punch holes in the data and PT backing stores and mark them for
938 * userfaultfd handling. This should result in 2 faults: the access
939 * on the data backing store, and its respective S1 page table walk
942 TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
943 uffd_data_handler, uffd_pt_handler, 2),
944 TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
945 uffd_data_handler, uffd_pt_handler, 2),
946 TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
947 uffd_data_handler, uffd_pt_handler, 2),
949 * Can't test guest_at with_af as it's IMPDEF whether the AF is set.
950 * The S1PTW fault should still be marked as a write.
952 TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
953 uffd_no_handler, uffd_pt_handler, 1),
954 TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
955 uffd_data_handler, uffd_pt_handler, 2),
956 TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
957 uffd_data_handler, uffd_pt_handler, 2),
958 TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
959 uffd_data_handler, uffd_pt_handler, 2),
960 TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
961 uffd_data_handler, uffd_pt_handler, 2),
962 TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
963 uffd_data_handler, uffd_pt_handler, 2),
966 * Try accesses when the data and PT memory regions are both
967 * tracked for dirty logging.
969 TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,
970 guest_check_s1ptw_wr_in_dirty_log),
971 TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,
972 guest_check_no_s1ptw_wr_in_dirty_log),
973 TEST_DIRTY_LOG(guest_ld_preidx, with_af,
974 guest_check_no_write_in_dirty_log,
975 guest_check_s1ptw_wr_in_dirty_log),
976 TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,
977 guest_check_no_s1ptw_wr_in_dirty_log),
978 TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,
979 guest_check_s1ptw_wr_in_dirty_log),
980 TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,
981 guest_check_s1ptw_wr_in_dirty_log),
982 TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,
983 guest_check_s1ptw_wr_in_dirty_log),
984 TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,
985 guest_check_s1ptw_wr_in_dirty_log),
986 TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,
987 guest_check_s1ptw_wr_in_dirty_log),
990 * Access when the data and PT memory regions are both marked for
991 * dirty logging and UFFD at the same time. The expected result is
992 * that writes should mark the dirty log and trigger a userfaultfd
993 * write fault. Reads/execs should result in a read userfaultfd
994 * fault, and nothing in the dirty log. Any S1PTW should result in
995 * a write in the dirty log and a userfaultfd write.
997 TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,
998 uffd_data_handler, 2,
999 guest_check_no_write_in_dirty_log,
1000 guest_check_s1ptw_wr_in_dirty_log),
1001 TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,
1002 uffd_data_handler, 2,
1003 guest_check_no_write_in_dirty_log,
1004 guest_check_no_s1ptw_wr_in_dirty_log),
1005 TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,
1007 2, guest_check_no_write_in_dirty_log,
1008 guest_check_s1ptw_wr_in_dirty_log),
1009 TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,
1010 guest_check_no_write_in_dirty_log,
1011 guest_check_s1ptw_wr_in_dirty_log),
1012 TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,
1013 uffd_data_handler, 2,
1014 guest_check_no_write_in_dirty_log,
1015 guest_check_s1ptw_wr_in_dirty_log),
1016 TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,
1018 2, guest_check_write_in_dirty_log,
1019 guest_check_s1ptw_wr_in_dirty_log),
1020 TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,
1021 uffd_data_handler, 2,
1022 guest_check_write_in_dirty_log,
1023 guest_check_s1ptw_wr_in_dirty_log),
1024 TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,
1026 2, guest_check_write_in_dirty_log,
1027 guest_check_s1ptw_wr_in_dirty_log),
1028 TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
1029 uffd_data_handler, 2,
1030 guest_check_write_in_dirty_log,
1031 guest_check_s1ptw_wr_in_dirty_log),
1033 * Access when both the PT and data regions are marked read-only
1034 * (with KVM_MEM_READONLY). Writes with a syndrome result in an
1035 * MMIO exit, writes with no syndrome (e.g., CAS) result in a
1036 * failed vcpu run, and reads/execs with and without syndroms do
1039 TEST_RO_MEMSLOT(guest_read64, 0, 0),
1040 TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),
1041 TEST_RO_MEMSLOT(guest_at, 0, 0),
1042 TEST_RO_MEMSLOT(guest_exec, 0, 0),
1043 TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),
1044 TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),
1045 TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
1046 TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
1049 * The PT and data regions are both read-only and marked
1050 * for dirty logging at the same time. The expected result is that
1051 * for writes there should be no write in the dirty log. The
1052 * readonly handling is the same as if the memslot was not marked
1053 * for dirty logging: writes with a syndrome result in an MMIO
1054 * exit, and writes with no syndrome result in a failed vcpu run.
1056 TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,
1057 guest_check_no_write_in_dirty_log),
1058 TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,
1059 guest_check_no_write_in_dirty_log),
1060 TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,
1061 guest_check_no_write_in_dirty_log),
1062 TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,
1063 guest_check_no_write_in_dirty_log),
1064 TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,
1065 1, guest_check_no_write_in_dirty_log),
1066 TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,
1067 guest_check_no_write_in_dirty_log),
1068 TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,
1069 guest_check_no_write_in_dirty_log),
1070 TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,
1071 guest_check_no_write_in_dirty_log),
1074 * The PT and data regions are both read-only and punched with
1075 * holes tracked with userfaultfd. The expected result is the
1076 * union of both userfaultfd and read-only behaviors. For example,
1077 * write accesses result in a userfaultfd write fault and an MMIO
1078 * exit. Writes with no syndrome result in a failed vcpu run and
1079 * no userfaultfd write fault. Reads result in userfaultfd getting
1082 TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),
1083 TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),
1084 TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),
1085 TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),
1086 TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
1087 uffd_data_handler, 2),
1088 TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),
1089 TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),
1090 TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),
1095 static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type)
1097 struct test_desc *t;
1099 for (t = &tests[0]; t->name; t++) {
1103 struct test_params p = {
1104 .src_type = src_type,
1108 for_each_guest_mode(run_test, &p);
1112 int main(int argc, char *argv[])
1114 enum vm_mem_backing_src_type src_type;
1117 src_type = DEFAULT_VM_MEM_SRC;
1119 while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
1122 guest_modes_cmdline(optarg);
1125 src_type = parse_backing_src_type(optarg);
1134 for_each_test_and_guest_mode(src_type);