2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
34 #include <asm/pgtable.h>
37 #include <asm/switch_to.h>
40 #include <asm/cpacf.h>
45 #define KMSG_COMPONENT "kvm-s390"
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49 #define CREATE_TRACE_POINTS
51 #include "trace-s390.h"
53 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56 (KVM_MAX_VCPUS + LOCAL_IRQS))
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 { "userspace_handled", VCPU_STAT(exit_userspace) },
62 { "exit_null", VCPU_STAT(exit_null) },
63 { "exit_validity", VCPU_STAT(exit_validity) },
64 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65 { "exit_external_request", VCPU_STAT(exit_external_request) },
66 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67 { "exit_instruction", VCPU_STAT(exit_instruction) },
68 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
69 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
70 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
71 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
72 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
73 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
74 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
75 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
76 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
77 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
78 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
79 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
80 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
81 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
82 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
83 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
84 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
85 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
86 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
87 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
88 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
89 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
90 { "instruction_spx", VCPU_STAT(instruction_spx) },
91 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
92 { "instruction_stap", VCPU_STAT(instruction_stap) },
93 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
94 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
95 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
96 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
97 { "instruction_essa", VCPU_STAT(instruction_essa) },
98 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
99 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
100 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
101 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
102 { "instruction_sie", VCPU_STAT(instruction_sie) },
103 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
104 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
105 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
106 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
107 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
108 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
109 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
110 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
111 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
112 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
113 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
114 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
115 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
116 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
117 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
118 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
119 { "diagnose_10", VCPU_STAT(diagnose_10) },
120 { "diagnose_44", VCPU_STAT(diagnose_44) },
121 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
122 { "diagnose_258", VCPU_STAT(diagnose_258) },
123 { "diagnose_308", VCPU_STAT(diagnose_308) },
124 { "diagnose_500", VCPU_STAT(diagnose_500) },
128 /* allow nested virtualization in KVM (if enabled by user space) */
130 module_param(nested, int, S_IRUGO);
131 MODULE_PARM_DESC(nested, "Nested virtualization support");
133 /* upper facilities limit for kvm */
134 unsigned long kvm_s390_fac_list_mask[16] = {
135 0xffe6000000000000UL,
136 0x005e000000000000UL,
139 unsigned long kvm_s390_fac_list_mask_size(void)
141 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
142 return ARRAY_SIZE(kvm_s390_fac_list_mask);
145 /* available cpu features supported by kvm */
146 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
147 /* available subfunctions indicated via query / "test bit" */
148 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
150 static struct gmap_notifier gmap_notifier;
151 static struct gmap_notifier vsie_gmap_notifier;
152 debug_info_t *kvm_s390_dbf;
154 /* Section: not file related */
155 int kvm_arch_hardware_enable(void)
157 /* every s390 is virtualization enabled ;-) */
161 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
165 * This callback is executed during stop_machine(). All CPUs are therefore
166 * temporarily stopped. In order not to change guest behavior, we have to
167 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
168 * so a CPU won't be stopped while calculating with the epoch.
170 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
174 struct kvm_vcpu *vcpu;
176 unsigned long long *delta = v;
178 list_for_each_entry(kvm, &vm_list, vm_list) {
179 kvm->arch.epoch -= *delta;
180 kvm_for_each_vcpu(i, vcpu, kvm) {
181 vcpu->arch.sie_block->epoch -= *delta;
182 if (vcpu->arch.cputm_enabled)
183 vcpu->arch.cputm_start += *delta;
184 if (vcpu->arch.vsie_block)
185 vcpu->arch.vsie_block->epoch -= *delta;
191 static struct notifier_block kvm_clock_notifier = {
192 .notifier_call = kvm_clock_sync,
195 int kvm_arch_hardware_setup(void)
197 gmap_notifier.notifier_call = kvm_gmap_notifier;
198 gmap_register_pte_notifier(&gmap_notifier);
199 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
200 gmap_register_pte_notifier(&vsie_gmap_notifier);
201 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
202 &kvm_clock_notifier);
206 void kvm_arch_hardware_unsetup(void)
208 gmap_unregister_pte_notifier(&gmap_notifier);
209 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
210 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
211 &kvm_clock_notifier);
214 static void allow_cpu_feat(unsigned long nr)
216 set_bit_inv(nr, kvm_s390_available_cpu_feat);
219 static inline int plo_test_bit(unsigned char nr)
221 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
222 int cc = 3; /* subfunction not available */
225 /* Parameter registers are ignored for "test bit" */
235 static void kvm_s390_cpu_feat_init(void)
239 for (i = 0; i < 256; ++i) {
241 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
244 if (test_facility(28)) /* TOD-clock steering */
245 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
247 if (test_facility(17)) { /* MSA */
248 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
249 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
250 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
251 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
252 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
254 if (test_facility(76)) /* MSA3 */
255 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
256 if (test_facility(77)) { /* MSA4 */
257 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
258 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
259 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
260 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
262 if (test_facility(57)) /* MSA5 */
263 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
265 if (MACHINE_HAS_ESOP)
266 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
268 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
269 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
271 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
272 !test_facility(3) || !nested)
274 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
275 if (sclp.has_64bscao)
276 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
278 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
280 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
282 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
284 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
286 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
288 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
290 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
291 * all skey handling functions read/set the skey from the PGSTE
292 * instead of the real storage key.
294 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
295 * pages being detected as preserved although they are resident.
297 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
298 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
300 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
301 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
302 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
304 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
305 * cannot easily shadow the SCA because of the ipte lock.
309 int kvm_arch_init(void *opaque)
311 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
315 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
316 debug_unregister(kvm_s390_dbf);
320 kvm_s390_cpu_feat_init();
322 /* Register floating interrupt controller interface. */
323 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
326 void kvm_arch_exit(void)
328 debug_unregister(kvm_s390_dbf);
331 /* Section: device related */
332 long kvm_arch_dev_ioctl(struct file *filp,
333 unsigned int ioctl, unsigned long arg)
335 if (ioctl == KVM_S390_ENABLE_SIE)
336 return s390_enable_sie();
340 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
345 case KVM_CAP_S390_PSW:
346 case KVM_CAP_S390_GMAP:
347 case KVM_CAP_SYNC_MMU:
348 #ifdef CONFIG_KVM_S390_UCONTROL
349 case KVM_CAP_S390_UCONTROL:
351 case KVM_CAP_ASYNC_PF:
352 case KVM_CAP_SYNC_REGS:
353 case KVM_CAP_ONE_REG:
354 case KVM_CAP_ENABLE_CAP:
355 case KVM_CAP_S390_CSS_SUPPORT:
356 case KVM_CAP_IOEVENTFD:
357 case KVM_CAP_DEVICE_CTRL:
358 case KVM_CAP_ENABLE_CAP_VM:
359 case KVM_CAP_S390_IRQCHIP:
360 case KVM_CAP_VM_ATTRIBUTES:
361 case KVM_CAP_MP_STATE:
362 case KVM_CAP_S390_INJECT_IRQ:
363 case KVM_CAP_S390_USER_SIGP:
364 case KVM_CAP_S390_USER_STSI:
365 case KVM_CAP_S390_SKEYS:
366 case KVM_CAP_S390_IRQ_STATE:
369 case KVM_CAP_S390_MEM_OP:
372 case KVM_CAP_NR_VCPUS:
373 case KVM_CAP_MAX_VCPUS:
374 r = KVM_S390_BSCA_CPU_SLOTS;
375 if (sclp.has_esca && sclp.has_64bscao)
376 r = KVM_S390_ESCA_CPU_SLOTS;
378 case KVM_CAP_NR_MEMSLOTS:
379 r = KVM_USER_MEM_SLOTS;
381 case KVM_CAP_S390_COW:
382 r = MACHINE_HAS_ESOP;
384 case KVM_CAP_S390_VECTOR_REGISTERS:
387 case KVM_CAP_S390_RI:
388 r = test_facility(64);
396 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
397 struct kvm_memory_slot *memslot)
399 gfn_t cur_gfn, last_gfn;
400 unsigned long address;
401 struct gmap *gmap = kvm->arch.gmap;
403 /* Loop over all guest pages */
404 last_gfn = memslot->base_gfn + memslot->npages;
405 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
406 address = gfn_to_hva_memslot(memslot, cur_gfn);
408 if (test_and_clear_guest_dirty(gmap->mm, address))
409 mark_page_dirty(kvm, cur_gfn);
410 if (fatal_signal_pending(current))
416 /* Section: vm related */
417 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
420 * Get (and clear) the dirty memory log for a memory slot.
422 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
423 struct kvm_dirty_log *log)
427 struct kvm_memslots *slots;
428 struct kvm_memory_slot *memslot;
431 mutex_lock(&kvm->slots_lock);
434 if (log->slot >= KVM_USER_MEM_SLOTS)
437 slots = kvm_memslots(kvm);
438 memslot = id_to_memslot(slots, log->slot);
440 if (!memslot->dirty_bitmap)
443 kvm_s390_sync_dirty_log(kvm, memslot);
444 r = kvm_get_dirty_log(kvm, log, &is_dirty);
448 /* Clear the dirty log */
450 n = kvm_dirty_bitmap_bytes(memslot);
451 memset(memslot->dirty_bitmap, 0, n);
455 mutex_unlock(&kvm->slots_lock);
459 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
467 case KVM_CAP_S390_IRQCHIP:
468 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
469 kvm->arch.use_irqchip = 1;
472 case KVM_CAP_S390_USER_SIGP:
473 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
474 kvm->arch.user_sigp = 1;
477 case KVM_CAP_S390_VECTOR_REGISTERS:
478 mutex_lock(&kvm->lock);
479 if (kvm->created_vcpus) {
481 } else if (MACHINE_HAS_VX) {
482 set_kvm_facility(kvm->arch.model.fac_mask, 129);
483 set_kvm_facility(kvm->arch.model.fac_list, 129);
487 mutex_unlock(&kvm->lock);
488 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
489 r ? "(not available)" : "(success)");
491 case KVM_CAP_S390_RI:
493 mutex_lock(&kvm->lock);
494 if (kvm->created_vcpus) {
496 } else if (test_facility(64)) {
497 set_kvm_facility(kvm->arch.model.fac_mask, 64);
498 set_kvm_facility(kvm->arch.model.fac_list, 64);
501 mutex_unlock(&kvm->lock);
502 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
503 r ? "(not available)" : "(success)");
505 case KVM_CAP_S390_USER_STSI:
506 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
507 kvm->arch.user_stsi = 1;
517 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
521 switch (attr->attr) {
522 case KVM_S390_VM_MEM_LIMIT_SIZE:
524 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
525 kvm->arch.mem_limit);
526 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
536 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
540 switch (attr->attr) {
541 case KVM_S390_VM_MEM_ENABLE_CMMA:
547 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
548 mutex_lock(&kvm->lock);
549 if (!kvm->created_vcpus) {
550 kvm->arch.use_cmma = 1;
553 mutex_unlock(&kvm->lock);
555 case KVM_S390_VM_MEM_CLR_CMMA:
560 if (!kvm->arch.use_cmma)
563 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
564 mutex_lock(&kvm->lock);
565 idx = srcu_read_lock(&kvm->srcu);
566 s390_reset_cmma(kvm->arch.gmap->mm);
567 srcu_read_unlock(&kvm->srcu, idx);
568 mutex_unlock(&kvm->lock);
571 case KVM_S390_VM_MEM_LIMIT_SIZE: {
572 unsigned long new_limit;
574 if (kvm_is_ucontrol(kvm))
577 if (get_user(new_limit, (u64 __user *)attr->addr))
580 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
581 new_limit > kvm->arch.mem_limit)
587 /* gmap_create takes last usable address */
588 if (new_limit != KVM_S390_NO_MEM_LIMIT)
592 mutex_lock(&kvm->lock);
593 if (!kvm->created_vcpus) {
594 /* gmap_create will round the limit up */
595 struct gmap *new = gmap_create(current->mm, new_limit);
600 gmap_remove(kvm->arch.gmap);
602 kvm->arch.gmap = new;
606 mutex_unlock(&kvm->lock);
607 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
608 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
609 (void *) kvm->arch.gmap->asce);
619 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
621 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
623 struct kvm_vcpu *vcpu;
626 if (!test_kvm_facility(kvm, 76))
629 mutex_lock(&kvm->lock);
630 switch (attr->attr) {
631 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
633 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
634 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
635 kvm->arch.crypto.aes_kw = 1;
636 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
638 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
640 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
641 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
642 kvm->arch.crypto.dea_kw = 1;
643 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
645 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
646 kvm->arch.crypto.aes_kw = 0;
647 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
648 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
649 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
651 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
652 kvm->arch.crypto.dea_kw = 0;
653 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
654 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
655 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
658 mutex_unlock(&kvm->lock);
662 kvm_for_each_vcpu(i, vcpu, kvm) {
663 kvm_s390_vcpu_crypto_setup(vcpu);
666 mutex_unlock(&kvm->lock);
670 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
674 if (copy_from_user(>od_high, (void __user *)attr->addr,
680 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
685 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
689 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
692 kvm_s390_set_tod_clock(kvm, gtod);
693 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
697 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
704 switch (attr->attr) {
705 case KVM_S390_VM_TOD_HIGH:
706 ret = kvm_s390_set_tod_high(kvm, attr);
708 case KVM_S390_VM_TOD_LOW:
709 ret = kvm_s390_set_tod_low(kvm, attr);
718 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
722 if (copy_to_user((void __user *)attr->addr, >od_high,
725 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
730 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
734 gtod = kvm_s390_get_tod_clock_fast(kvm);
735 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
737 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
742 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
749 switch (attr->attr) {
750 case KVM_S390_VM_TOD_HIGH:
751 ret = kvm_s390_get_tod_high(kvm, attr);
753 case KVM_S390_VM_TOD_LOW:
754 ret = kvm_s390_get_tod_low(kvm, attr);
763 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
765 struct kvm_s390_vm_cpu_processor *proc;
766 u16 lowest_ibc, unblocked_ibc;
769 mutex_lock(&kvm->lock);
770 if (kvm->created_vcpus) {
774 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
779 if (!copy_from_user(proc, (void __user *)attr->addr,
781 kvm->arch.model.cpuid = proc->cpuid;
782 lowest_ibc = sclp.ibc >> 16 & 0xfff;
783 unblocked_ibc = sclp.ibc & 0xfff;
785 if (proc->ibc > unblocked_ibc)
786 kvm->arch.model.ibc = unblocked_ibc;
787 else if (proc->ibc < lowest_ibc)
788 kvm->arch.model.ibc = lowest_ibc;
790 kvm->arch.model.ibc = proc->ibc;
792 memcpy(kvm->arch.model.fac_list, proc->fac_list,
793 S390_ARCH_FAC_LIST_SIZE_BYTE);
798 mutex_unlock(&kvm->lock);
802 static int kvm_s390_set_processor_feat(struct kvm *kvm,
803 struct kvm_device_attr *attr)
805 struct kvm_s390_vm_cpu_feat data;
808 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
810 if (!bitmap_subset((unsigned long *) data.feat,
811 kvm_s390_available_cpu_feat,
812 KVM_S390_VM_CPU_FEAT_NR_BITS))
815 mutex_lock(&kvm->lock);
816 if (!atomic_read(&kvm->online_vcpus)) {
817 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
818 KVM_S390_VM_CPU_FEAT_NR_BITS);
821 mutex_unlock(&kvm->lock);
825 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
826 struct kvm_device_attr *attr)
829 * Once supported by kernel + hw, we have to store the subfunctions
830 * in kvm->arch and remember that user space configured them.
835 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
839 switch (attr->attr) {
840 case KVM_S390_VM_CPU_PROCESSOR:
841 ret = kvm_s390_set_processor(kvm, attr);
843 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
844 ret = kvm_s390_set_processor_feat(kvm, attr);
846 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
847 ret = kvm_s390_set_processor_subfunc(kvm, attr);
853 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
855 struct kvm_s390_vm_cpu_processor *proc;
858 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
863 proc->cpuid = kvm->arch.model.cpuid;
864 proc->ibc = kvm->arch.model.ibc;
865 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
866 S390_ARCH_FAC_LIST_SIZE_BYTE);
867 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
874 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
876 struct kvm_s390_vm_cpu_machine *mach;
879 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
884 get_cpu_id((struct cpuid *) &mach->cpuid);
885 mach->ibc = sclp.ibc;
886 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
887 S390_ARCH_FAC_LIST_SIZE_BYTE);
888 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
889 S390_ARCH_FAC_LIST_SIZE_BYTE);
890 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
897 static int kvm_s390_get_processor_feat(struct kvm *kvm,
898 struct kvm_device_attr *attr)
900 struct kvm_s390_vm_cpu_feat data;
902 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
903 KVM_S390_VM_CPU_FEAT_NR_BITS);
904 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
909 static int kvm_s390_get_machine_feat(struct kvm *kvm,
910 struct kvm_device_attr *attr)
912 struct kvm_s390_vm_cpu_feat data;
914 bitmap_copy((unsigned long *) data.feat,
915 kvm_s390_available_cpu_feat,
916 KVM_S390_VM_CPU_FEAT_NR_BITS);
917 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
922 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
923 struct kvm_device_attr *attr)
926 * Once we can actually configure subfunctions (kernel + hw support),
927 * we have to check if they were already set by user space, if so copy
928 * them from kvm->arch.
933 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
934 struct kvm_device_attr *attr)
936 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
937 sizeof(struct kvm_s390_vm_cpu_subfunc)))
941 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
945 switch (attr->attr) {
946 case KVM_S390_VM_CPU_PROCESSOR:
947 ret = kvm_s390_get_processor(kvm, attr);
949 case KVM_S390_VM_CPU_MACHINE:
950 ret = kvm_s390_get_machine(kvm, attr);
952 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
953 ret = kvm_s390_get_processor_feat(kvm, attr);
955 case KVM_S390_VM_CPU_MACHINE_FEAT:
956 ret = kvm_s390_get_machine_feat(kvm, attr);
958 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
959 ret = kvm_s390_get_processor_subfunc(kvm, attr);
961 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
962 ret = kvm_s390_get_machine_subfunc(kvm, attr);
968 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
972 switch (attr->group) {
973 case KVM_S390_VM_MEM_CTRL:
974 ret = kvm_s390_set_mem_control(kvm, attr);
976 case KVM_S390_VM_TOD:
977 ret = kvm_s390_set_tod(kvm, attr);
979 case KVM_S390_VM_CPU_MODEL:
980 ret = kvm_s390_set_cpu_model(kvm, attr);
982 case KVM_S390_VM_CRYPTO:
983 ret = kvm_s390_vm_set_crypto(kvm, attr);
993 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
997 switch (attr->group) {
998 case KVM_S390_VM_MEM_CTRL:
999 ret = kvm_s390_get_mem_control(kvm, attr);
1001 case KVM_S390_VM_TOD:
1002 ret = kvm_s390_get_tod(kvm, attr);
1004 case KVM_S390_VM_CPU_MODEL:
1005 ret = kvm_s390_get_cpu_model(kvm, attr);
1015 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1019 switch (attr->group) {
1020 case KVM_S390_VM_MEM_CTRL:
1021 switch (attr->attr) {
1022 case KVM_S390_VM_MEM_ENABLE_CMMA:
1023 case KVM_S390_VM_MEM_CLR_CMMA:
1024 ret = sclp.has_cmma ? 0 : -ENXIO;
1026 case KVM_S390_VM_MEM_LIMIT_SIZE:
1034 case KVM_S390_VM_TOD:
1035 switch (attr->attr) {
1036 case KVM_S390_VM_TOD_LOW:
1037 case KVM_S390_VM_TOD_HIGH:
1045 case KVM_S390_VM_CPU_MODEL:
1046 switch (attr->attr) {
1047 case KVM_S390_VM_CPU_PROCESSOR:
1048 case KVM_S390_VM_CPU_MACHINE:
1049 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1050 case KVM_S390_VM_CPU_MACHINE_FEAT:
1051 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1054 /* configuring subfunctions is not supported yet */
1055 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1061 case KVM_S390_VM_CRYPTO:
1062 switch (attr->attr) {
1063 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1064 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1065 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1066 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1082 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1088 if (args->flags != 0)
1091 /* Is this guest using storage keys? */
1092 if (!mm_use_skey(current->mm))
1093 return KVM_S390_GET_SKEYS_NONE;
1095 /* Enforce sane limit on memory allocation */
1096 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1099 keys = kmalloc_array(args->count, sizeof(uint8_t),
1100 GFP_KERNEL | __GFP_NOWARN);
1102 keys = vmalloc(sizeof(uint8_t) * args->count);
1106 down_read(¤t->mm->mmap_sem);
1107 for (i = 0; i < args->count; i++) {
1108 hva = gfn_to_hva(kvm, args->start_gfn + i);
1109 if (kvm_is_error_hva(hva)) {
1114 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1118 up_read(¤t->mm->mmap_sem);
1121 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1122 sizeof(uint8_t) * args->count);
1131 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1137 if (args->flags != 0)
1140 /* Enforce sane limit on memory allocation */
1141 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1144 keys = kmalloc_array(args->count, sizeof(uint8_t),
1145 GFP_KERNEL | __GFP_NOWARN);
1147 keys = vmalloc(sizeof(uint8_t) * args->count);
1151 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1152 sizeof(uint8_t) * args->count);
1158 /* Enable storage key handling for the guest */
1159 r = s390_enable_skey();
1163 down_read(¤t->mm->mmap_sem);
1164 for (i = 0; i < args->count; i++) {
1165 hva = gfn_to_hva(kvm, args->start_gfn + i);
1166 if (kvm_is_error_hva(hva)) {
1171 /* Lowest order bit is reserved */
1172 if (keys[i] & 0x01) {
1177 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1181 up_read(¤t->mm->mmap_sem);
1187 long kvm_arch_vm_ioctl(struct file *filp,
1188 unsigned int ioctl, unsigned long arg)
1190 struct kvm *kvm = filp->private_data;
1191 void __user *argp = (void __user *)arg;
1192 struct kvm_device_attr attr;
1196 case KVM_S390_INTERRUPT: {
1197 struct kvm_s390_interrupt s390int;
1200 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1202 r = kvm_s390_inject_vm(kvm, &s390int);
1205 case KVM_ENABLE_CAP: {
1206 struct kvm_enable_cap cap;
1208 if (copy_from_user(&cap, argp, sizeof(cap)))
1210 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1213 case KVM_CREATE_IRQCHIP: {
1214 struct kvm_irq_routing_entry routing;
1217 if (kvm->arch.use_irqchip) {
1218 /* Set up dummy routing. */
1219 memset(&routing, 0, sizeof(routing));
1220 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1224 case KVM_SET_DEVICE_ATTR: {
1226 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1228 r = kvm_s390_vm_set_attr(kvm, &attr);
1231 case KVM_GET_DEVICE_ATTR: {
1233 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1235 r = kvm_s390_vm_get_attr(kvm, &attr);
1238 case KVM_HAS_DEVICE_ATTR: {
1240 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1242 r = kvm_s390_vm_has_attr(kvm, &attr);
1245 case KVM_S390_GET_SKEYS: {
1246 struct kvm_s390_skeys args;
1249 if (copy_from_user(&args, argp,
1250 sizeof(struct kvm_s390_skeys)))
1252 r = kvm_s390_get_skeys(kvm, &args);
1255 case KVM_S390_SET_SKEYS: {
1256 struct kvm_s390_skeys args;
1259 if (copy_from_user(&args, argp,
1260 sizeof(struct kvm_s390_skeys)))
1262 r = kvm_s390_set_skeys(kvm, &args);
1272 static int kvm_s390_query_ap_config(u8 *config)
1274 u32 fcn_code = 0x04000000UL;
1277 memset(config, 0, 128);
1281 ".long 0xb2af0000\n" /* PQAP(QCI) */
1287 : "r" (fcn_code), "r" (config)
1288 : "cc", "0", "2", "memory"
1294 static int kvm_s390_apxa_installed(void)
1299 if (test_facility(12)) {
1300 cc = kvm_s390_query_ap_config(config);
1303 pr_err("PQAP(QCI) failed with cc=%d", cc);
1305 return config[0] & 0x40;
1311 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1313 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1315 if (kvm_s390_apxa_installed())
1316 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1318 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1321 static u64 kvm_s390_get_initial_cpuid(void)
1326 cpuid.version = 0xff;
1327 return *((u64 *) &cpuid);
1330 static void kvm_s390_crypto_init(struct kvm *kvm)
1332 if (!test_kvm_facility(kvm, 76))
1335 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1336 kvm_s390_set_crycb_format(kvm);
1338 /* Enable AES/DEA protected key functions by default */
1339 kvm->arch.crypto.aes_kw = 1;
1340 kvm->arch.crypto.dea_kw = 1;
1341 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1342 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1343 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1344 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1347 static void sca_dispose(struct kvm *kvm)
1349 if (kvm->arch.use_esca)
1350 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1352 free_page((unsigned long)(kvm->arch.sca));
1353 kvm->arch.sca = NULL;
1356 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1358 gfp_t alloc_flags = GFP_KERNEL;
1360 char debug_name[16];
1361 static unsigned long sca_offset;
1364 #ifdef CONFIG_KVM_S390_UCONTROL
1365 if (type & ~KVM_VM_S390_UCONTROL)
1367 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1374 rc = s390_enable_sie();
1380 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1382 kvm->arch.use_esca = 0; /* start with basic SCA */
1383 if (!sclp.has_64bscao)
1384 alloc_flags |= GFP_DMA;
1385 rwlock_init(&kvm->arch.sca_lock);
1386 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1389 spin_lock(&kvm_lock);
1391 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1393 kvm->arch.sca = (struct bsca_block *)
1394 ((char *) kvm->arch.sca + sca_offset);
1395 spin_unlock(&kvm_lock);
1397 sprintf(debug_name, "kvm-%u", current->pid);
1399 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1403 kvm->arch.sie_page2 =
1404 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1405 if (!kvm->arch.sie_page2)
1408 /* Populate the facility mask initially. */
1409 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1410 S390_ARCH_FAC_LIST_SIZE_BYTE);
1411 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1412 if (i < kvm_s390_fac_list_mask_size())
1413 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1415 kvm->arch.model.fac_mask[i] = 0UL;
1418 /* Populate the facility list initially. */
1419 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1420 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1421 S390_ARCH_FAC_LIST_SIZE_BYTE);
1423 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1424 set_kvm_facility(kvm->arch.model.fac_list, 74);
1426 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1427 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1429 kvm_s390_crypto_init(kvm);
1431 spin_lock_init(&kvm->arch.float_int.lock);
1432 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1433 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1434 init_waitqueue_head(&kvm->arch.ipte_wq);
1435 mutex_init(&kvm->arch.ipte_mutex);
1437 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1438 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1440 if (type & KVM_VM_S390_UCONTROL) {
1441 kvm->arch.gmap = NULL;
1442 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1444 if (sclp.hamax == U64_MAX)
1445 kvm->arch.mem_limit = TASK_MAX_SIZE;
1447 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1449 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1450 if (!kvm->arch.gmap)
1452 kvm->arch.gmap->private = kvm;
1453 kvm->arch.gmap->pfault_enabled = 0;
1456 kvm->arch.css_support = 0;
1457 kvm->arch.use_irqchip = 0;
1458 kvm->arch.epoch = 0;
1460 spin_lock_init(&kvm->arch.start_stop_lock);
1461 kvm_s390_vsie_init(kvm);
1462 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1466 free_page((unsigned long)kvm->arch.sie_page2);
1467 debug_unregister(kvm->arch.dbf);
1469 KVM_EVENT(3, "creation of vm failed: %d", rc);
1473 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1475 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1476 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1477 kvm_s390_clear_local_irqs(vcpu);
1478 kvm_clear_async_pf_completion_queue(vcpu);
1479 if (!kvm_is_ucontrol(vcpu->kvm))
1482 if (kvm_is_ucontrol(vcpu->kvm))
1483 gmap_remove(vcpu->arch.gmap);
1485 if (vcpu->kvm->arch.use_cmma)
1486 kvm_s390_vcpu_unsetup_cmma(vcpu);
1487 free_page((unsigned long)(vcpu->arch.sie_block));
1489 kvm_vcpu_uninit(vcpu);
1490 kmem_cache_free(kvm_vcpu_cache, vcpu);
1493 static void kvm_free_vcpus(struct kvm *kvm)
1496 struct kvm_vcpu *vcpu;
1498 kvm_for_each_vcpu(i, vcpu, kvm)
1499 kvm_arch_vcpu_destroy(vcpu);
1501 mutex_lock(&kvm->lock);
1502 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1503 kvm->vcpus[i] = NULL;
1505 atomic_set(&kvm->online_vcpus, 0);
1506 mutex_unlock(&kvm->lock);
1509 void kvm_arch_destroy_vm(struct kvm *kvm)
1511 kvm_free_vcpus(kvm);
1513 debug_unregister(kvm->arch.dbf);
1514 free_page((unsigned long)kvm->arch.sie_page2);
1515 if (!kvm_is_ucontrol(kvm))
1516 gmap_remove(kvm->arch.gmap);
1517 kvm_s390_destroy_adapters(kvm);
1518 kvm_s390_clear_float_irqs(kvm);
1519 kvm_s390_vsie_destroy(kvm);
1520 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1523 /* Section: vcpu related */
1524 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1526 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1527 if (!vcpu->arch.gmap)
1529 vcpu->arch.gmap->private = vcpu->kvm;
1534 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1536 read_lock(&vcpu->kvm->arch.sca_lock);
1537 if (vcpu->kvm->arch.use_esca) {
1538 struct esca_block *sca = vcpu->kvm->arch.sca;
1540 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1541 sca->cpu[vcpu->vcpu_id].sda = 0;
1543 struct bsca_block *sca = vcpu->kvm->arch.sca;
1545 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1546 sca->cpu[vcpu->vcpu_id].sda = 0;
1548 read_unlock(&vcpu->kvm->arch.sca_lock);
1551 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1553 read_lock(&vcpu->kvm->arch.sca_lock);
1554 if (vcpu->kvm->arch.use_esca) {
1555 struct esca_block *sca = vcpu->kvm->arch.sca;
1557 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1558 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1559 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1560 vcpu->arch.sie_block->ecb2 |= 0x04U;
1561 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1563 struct bsca_block *sca = vcpu->kvm->arch.sca;
1565 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1566 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1567 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1568 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1570 read_unlock(&vcpu->kvm->arch.sca_lock);
1573 /* Basic SCA to Extended SCA data copy routines */
1574 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1577 d->sigp_ctrl.c = s->sigp_ctrl.c;
1578 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1581 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1585 d->ipte_control = s->ipte_control;
1587 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1588 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1591 static int sca_switch_to_extended(struct kvm *kvm)
1593 struct bsca_block *old_sca = kvm->arch.sca;
1594 struct esca_block *new_sca;
1595 struct kvm_vcpu *vcpu;
1596 unsigned int vcpu_idx;
1599 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1603 scaoh = (u32)((u64)(new_sca) >> 32);
1604 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1606 kvm_s390_vcpu_block_all(kvm);
1607 write_lock(&kvm->arch.sca_lock);
1609 sca_copy_b_to_e(new_sca, old_sca);
1611 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1612 vcpu->arch.sie_block->scaoh = scaoh;
1613 vcpu->arch.sie_block->scaol = scaol;
1614 vcpu->arch.sie_block->ecb2 |= 0x04U;
1616 kvm->arch.sca = new_sca;
1617 kvm->arch.use_esca = 1;
1619 write_unlock(&kvm->arch.sca_lock);
1620 kvm_s390_vcpu_unblock_all(kvm);
1622 free_page((unsigned long)old_sca);
1624 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1625 old_sca, kvm->arch.sca);
1629 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1633 if (id < KVM_S390_BSCA_CPU_SLOTS)
1635 if (!sclp.has_esca || !sclp.has_64bscao)
1638 mutex_lock(&kvm->lock);
1639 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1640 mutex_unlock(&kvm->lock);
1642 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1645 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1647 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1648 kvm_clear_async_pf_completion_queue(vcpu);
1649 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1655 if (test_kvm_facility(vcpu->kvm, 64))
1656 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1657 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1658 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1661 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1663 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1665 if (kvm_is_ucontrol(vcpu->kvm))
1666 return __kvm_ucontrol_vcpu_init(vcpu);
1671 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1672 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1674 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1675 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1676 vcpu->arch.cputm_start = get_tod_clock_fast();
1677 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1680 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1681 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1683 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1684 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1685 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1686 vcpu->arch.cputm_start = 0;
1687 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1690 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1691 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1693 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1694 vcpu->arch.cputm_enabled = true;
1695 __start_cpu_timer_accounting(vcpu);
1698 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1699 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1701 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1702 __stop_cpu_timer_accounting(vcpu);
1703 vcpu->arch.cputm_enabled = false;
1706 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1708 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1709 __enable_cpu_timer_accounting(vcpu);
1713 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1715 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1716 __disable_cpu_timer_accounting(vcpu);
1720 /* set the cpu timer - may only be called from the VCPU thread itself */
1721 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1723 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1724 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1725 if (vcpu->arch.cputm_enabled)
1726 vcpu->arch.cputm_start = get_tod_clock_fast();
1727 vcpu->arch.sie_block->cputm = cputm;
1728 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1732 /* update and get the cpu timer - can also be called from other VCPU threads */
1733 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1738 if (unlikely(!vcpu->arch.cputm_enabled))
1739 return vcpu->arch.sie_block->cputm;
1741 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1743 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1745 * If the writer would ever execute a read in the critical
1746 * section, e.g. in irq context, we have a deadlock.
1748 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1749 value = vcpu->arch.sie_block->cputm;
1750 /* if cputm_start is 0, accounting is being started/stopped */
1751 if (likely(vcpu->arch.cputm_start))
1752 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1753 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1758 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1760 /* Save host register state */
1762 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1763 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1766 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1768 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1769 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1770 if (test_fp_ctl(current->thread.fpu.fpc))
1771 /* User space provided an invalid FPC, let's clear it */
1772 current->thread.fpu.fpc = 0;
1774 save_access_regs(vcpu->arch.host_acrs);
1775 restore_access_regs(vcpu->run->s.regs.acrs);
1776 gmap_enable(vcpu->arch.enabled_gmap);
1777 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1778 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1779 __start_cpu_timer_accounting(vcpu);
1783 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1786 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1787 __stop_cpu_timer_accounting(vcpu);
1788 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1789 vcpu->arch.enabled_gmap = gmap_get_enabled();
1790 gmap_disable(vcpu->arch.enabled_gmap);
1792 /* Save guest register state */
1794 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1796 /* Restore host register state */
1797 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1798 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1800 save_access_regs(vcpu->run->s.regs.acrs);
1801 restore_access_regs(vcpu->arch.host_acrs);
1804 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1806 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1807 vcpu->arch.sie_block->gpsw.mask = 0UL;
1808 vcpu->arch.sie_block->gpsw.addr = 0UL;
1809 kvm_s390_set_prefix(vcpu, 0);
1810 kvm_s390_set_cpu_timer(vcpu, 0);
1811 vcpu->arch.sie_block->ckc = 0UL;
1812 vcpu->arch.sie_block->todpr = 0;
1813 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1814 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1815 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1816 /* make sure the new fpc will be lazily loaded */
1818 current->thread.fpu.fpc = 0;
1819 vcpu->arch.sie_block->gbea = 1;
1820 vcpu->arch.sie_block->pp = 0;
1821 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1822 kvm_clear_async_pf_completion_queue(vcpu);
1823 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1824 kvm_s390_vcpu_stop(vcpu);
1825 kvm_s390_clear_local_irqs(vcpu);
1828 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1830 mutex_lock(&vcpu->kvm->lock);
1832 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1834 mutex_unlock(&vcpu->kvm->lock);
1835 if (!kvm_is_ucontrol(vcpu->kvm)) {
1836 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1839 /* make vcpu_load load the right gmap on the first trigger */
1840 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1843 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1845 if (!test_kvm_facility(vcpu->kvm, 76))
1848 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1850 if (vcpu->kvm->arch.crypto.aes_kw)
1851 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1852 if (vcpu->kvm->arch.crypto.dea_kw)
1853 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1855 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1858 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1860 free_page(vcpu->arch.sie_block->cbrlo);
1861 vcpu->arch.sie_block->cbrlo = 0;
1864 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1866 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1867 if (!vcpu->arch.sie_block->cbrlo)
1870 vcpu->arch.sie_block->ecb2 |= 0x80;
1871 vcpu->arch.sie_block->ecb2 &= ~0x08;
1875 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1877 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1879 vcpu->arch.sie_block->ibc = model->ibc;
1880 if (test_kvm_facility(vcpu->kvm, 7))
1881 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1884 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1888 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1892 if (test_kvm_facility(vcpu->kvm, 78))
1893 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1894 else if (test_kvm_facility(vcpu->kvm, 8))
1895 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1897 kvm_s390_vcpu_setup_model(vcpu);
1899 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1900 if (MACHINE_HAS_ESOP)
1901 vcpu->arch.sie_block->ecb |= 0x02;
1902 if (test_kvm_facility(vcpu->kvm, 9))
1903 vcpu->arch.sie_block->ecb |= 0x04;
1904 if (test_kvm_facility(vcpu->kvm, 73))
1905 vcpu->arch.sie_block->ecb |= 0x10;
1907 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1908 vcpu->arch.sie_block->ecb2 |= 0x08;
1909 vcpu->arch.sie_block->eca = 0x1002000U;
1911 vcpu->arch.sie_block->eca |= 0x80000000U;
1913 vcpu->arch.sie_block->eca |= 0x40000000U;
1915 vcpu->arch.sie_block->eca |= 1;
1916 if (sclp.has_sigpif)
1917 vcpu->arch.sie_block->eca |= 0x10000000U;
1918 if (test_kvm_facility(vcpu->kvm, 64))
1919 vcpu->arch.sie_block->ecb3 |= 0x01;
1920 if (test_kvm_facility(vcpu->kvm, 129)) {
1921 vcpu->arch.sie_block->eca |= 0x00020000;
1922 vcpu->arch.sie_block->ecd |= 0x20000000;
1924 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1925 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1926 if (test_kvm_facility(vcpu->kvm, 74))
1927 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1929 if (vcpu->kvm->arch.use_cmma) {
1930 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1934 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1935 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1937 kvm_s390_vcpu_crypto_setup(vcpu);
1942 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1945 struct kvm_vcpu *vcpu;
1946 struct sie_page *sie_page;
1949 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1954 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1958 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1962 vcpu->arch.sie_block = &sie_page->sie_block;
1963 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1965 /* the real guest size will always be smaller than msl */
1966 vcpu->arch.sie_block->mso = 0;
1967 vcpu->arch.sie_block->msl = sclp.hamax;
1969 vcpu->arch.sie_block->icpua = id;
1970 spin_lock_init(&vcpu->arch.local_int.lock);
1971 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1972 vcpu->arch.local_int.wq = &vcpu->wq;
1973 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1974 seqcount_init(&vcpu->arch.cputm_seqcount);
1976 rc = kvm_vcpu_init(vcpu, kvm, id);
1978 goto out_free_sie_block;
1979 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1980 vcpu->arch.sie_block);
1981 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1985 free_page((unsigned long)(vcpu->arch.sie_block));
1987 kmem_cache_free(kvm_vcpu_cache, vcpu);
1992 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1994 return kvm_s390_vcpu_has_irq(vcpu, 0);
1997 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1999 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2003 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2005 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2008 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2010 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2014 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2016 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2020 * Kick a guest cpu out of SIE and wait until SIE is not running.
2021 * If the CPU is not running (e.g. waiting as idle) the function will
2022 * return immediately. */
2023 void exit_sie(struct kvm_vcpu *vcpu)
2025 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2026 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2030 /* Kick a guest cpu out of SIE to process a request synchronously */
2031 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2033 kvm_make_request(req, vcpu);
2034 kvm_s390_vcpu_request(vcpu);
2037 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2040 struct kvm *kvm = gmap->private;
2041 struct kvm_vcpu *vcpu;
2042 unsigned long prefix;
2045 if (gmap_is_shadow(gmap))
2047 if (start >= 1UL << 31)
2048 /* We are only interested in prefix pages */
2050 kvm_for_each_vcpu(i, vcpu, kvm) {
2051 /* match against both prefix pages */
2052 prefix = kvm_s390_get_prefix(vcpu);
2053 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2054 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2056 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2061 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2063 /* kvm common code refers to this, but never calls it */
2068 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2069 struct kvm_one_reg *reg)
2074 case KVM_REG_S390_TODPR:
2075 r = put_user(vcpu->arch.sie_block->todpr,
2076 (u32 __user *)reg->addr);
2078 case KVM_REG_S390_EPOCHDIFF:
2079 r = put_user(vcpu->arch.sie_block->epoch,
2080 (u64 __user *)reg->addr);
2082 case KVM_REG_S390_CPU_TIMER:
2083 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2084 (u64 __user *)reg->addr);
2086 case KVM_REG_S390_CLOCK_COMP:
2087 r = put_user(vcpu->arch.sie_block->ckc,
2088 (u64 __user *)reg->addr);
2090 case KVM_REG_S390_PFTOKEN:
2091 r = put_user(vcpu->arch.pfault_token,
2092 (u64 __user *)reg->addr);
2094 case KVM_REG_S390_PFCOMPARE:
2095 r = put_user(vcpu->arch.pfault_compare,
2096 (u64 __user *)reg->addr);
2098 case KVM_REG_S390_PFSELECT:
2099 r = put_user(vcpu->arch.pfault_select,
2100 (u64 __user *)reg->addr);
2102 case KVM_REG_S390_PP:
2103 r = put_user(vcpu->arch.sie_block->pp,
2104 (u64 __user *)reg->addr);
2106 case KVM_REG_S390_GBEA:
2107 r = put_user(vcpu->arch.sie_block->gbea,
2108 (u64 __user *)reg->addr);
2117 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2118 struct kvm_one_reg *reg)
2124 case KVM_REG_S390_TODPR:
2125 r = get_user(vcpu->arch.sie_block->todpr,
2126 (u32 __user *)reg->addr);
2128 case KVM_REG_S390_EPOCHDIFF:
2129 r = get_user(vcpu->arch.sie_block->epoch,
2130 (u64 __user *)reg->addr);
2132 case KVM_REG_S390_CPU_TIMER:
2133 r = get_user(val, (u64 __user *)reg->addr);
2135 kvm_s390_set_cpu_timer(vcpu, val);
2137 case KVM_REG_S390_CLOCK_COMP:
2138 r = get_user(vcpu->arch.sie_block->ckc,
2139 (u64 __user *)reg->addr);
2141 case KVM_REG_S390_PFTOKEN:
2142 r = get_user(vcpu->arch.pfault_token,
2143 (u64 __user *)reg->addr);
2144 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2145 kvm_clear_async_pf_completion_queue(vcpu);
2147 case KVM_REG_S390_PFCOMPARE:
2148 r = get_user(vcpu->arch.pfault_compare,
2149 (u64 __user *)reg->addr);
2151 case KVM_REG_S390_PFSELECT:
2152 r = get_user(vcpu->arch.pfault_select,
2153 (u64 __user *)reg->addr);
2155 case KVM_REG_S390_PP:
2156 r = get_user(vcpu->arch.sie_block->pp,
2157 (u64 __user *)reg->addr);
2159 case KVM_REG_S390_GBEA:
2160 r = get_user(vcpu->arch.sie_block->gbea,
2161 (u64 __user *)reg->addr);
2170 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2172 kvm_s390_vcpu_initial_reset(vcpu);
2176 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2178 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2182 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2184 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2188 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2189 struct kvm_sregs *sregs)
2191 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2192 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2193 restore_access_regs(vcpu->run->s.regs.acrs);
2197 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2198 struct kvm_sregs *sregs)
2200 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2201 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2205 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2207 /* make sure the new values will be lazily loaded */
2209 if (test_fp_ctl(fpu->fpc))
2211 current->thread.fpu.fpc = fpu->fpc;
2213 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2215 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2219 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2221 /* make sure we have the latest values */
2224 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2226 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2227 fpu->fpc = current->thread.fpu.fpc;
2231 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2235 if (!is_vcpu_stopped(vcpu))
2238 vcpu->run->psw_mask = psw.mask;
2239 vcpu->run->psw_addr = psw.addr;
2244 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2245 struct kvm_translation *tr)
2247 return -EINVAL; /* not implemented yet */
2250 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2251 KVM_GUESTDBG_USE_HW_BP | \
2252 KVM_GUESTDBG_ENABLE)
2254 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2255 struct kvm_guest_debug *dbg)
2259 vcpu->guest_debug = 0;
2260 kvm_s390_clear_bp_data(vcpu);
2262 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2264 if (!sclp.has_gpere)
2267 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2268 vcpu->guest_debug = dbg->control;
2269 /* enforce guest PER */
2270 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2272 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2273 rc = kvm_s390_import_bp_data(vcpu, dbg);
2275 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2276 vcpu->arch.guestdbg.last_bp = 0;
2280 vcpu->guest_debug = 0;
2281 kvm_s390_clear_bp_data(vcpu);
2282 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2288 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2289 struct kvm_mp_state *mp_state)
2291 /* CHECK_STOP and LOAD are not supported yet */
2292 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2293 KVM_MP_STATE_OPERATING;
2296 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2297 struct kvm_mp_state *mp_state)
2301 /* user space knows about this interface - let it control the state */
2302 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2304 switch (mp_state->mp_state) {
2305 case KVM_MP_STATE_STOPPED:
2306 kvm_s390_vcpu_stop(vcpu);
2308 case KVM_MP_STATE_OPERATING:
2309 kvm_s390_vcpu_start(vcpu);
2311 case KVM_MP_STATE_LOAD:
2312 case KVM_MP_STATE_CHECK_STOP:
2313 /* fall through - CHECK_STOP and LOAD are not supported yet */
2321 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2323 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2326 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2329 kvm_s390_vcpu_request_handled(vcpu);
2330 if (!vcpu->requests)
2333 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2334 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2335 * This ensures that the ipte instruction for this request has
2336 * already finished. We might race against a second unmapper that
2337 * wants to set the blocking bit. Lets just retry the request loop.
2339 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2341 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2342 kvm_s390_get_prefix(vcpu),
2343 PAGE_SIZE * 2, PROT_WRITE);
2349 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2350 vcpu->arch.sie_block->ihcpu = 0xffff;
2354 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2355 if (!ibs_enabled(vcpu)) {
2356 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2357 atomic_or(CPUSTAT_IBS,
2358 &vcpu->arch.sie_block->cpuflags);
2363 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2364 if (ibs_enabled(vcpu)) {
2365 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2366 atomic_andnot(CPUSTAT_IBS,
2367 &vcpu->arch.sie_block->cpuflags);
2372 /* nothing to do, just clear the request */
2373 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2378 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2380 struct kvm_vcpu *vcpu;
2383 mutex_lock(&kvm->lock);
2385 kvm->arch.epoch = tod - get_tod_clock();
2386 kvm_s390_vcpu_block_all(kvm);
2387 kvm_for_each_vcpu(i, vcpu, kvm)
2388 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2389 kvm_s390_vcpu_unblock_all(kvm);
2391 mutex_unlock(&kvm->lock);
2395 * kvm_arch_fault_in_page - fault-in guest page if necessary
2396 * @vcpu: The corresponding virtual cpu
2397 * @gpa: Guest physical address
2398 * @writable: Whether the page should be writable or not
2400 * Make sure that a guest page has been faulted-in on the host.
2402 * Return: Zero on success, negative error code otherwise.
2404 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2406 return gmap_fault(vcpu->arch.gmap, gpa,
2407 writable ? FAULT_FLAG_WRITE : 0);
2410 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2411 unsigned long token)
2413 struct kvm_s390_interrupt inti;
2414 struct kvm_s390_irq irq;
2417 irq.u.ext.ext_params2 = token;
2418 irq.type = KVM_S390_INT_PFAULT_INIT;
2419 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2421 inti.type = KVM_S390_INT_PFAULT_DONE;
2422 inti.parm64 = token;
2423 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2427 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2428 struct kvm_async_pf *work)
2430 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2431 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2434 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2435 struct kvm_async_pf *work)
2437 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2438 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2441 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2442 struct kvm_async_pf *work)
2444 /* s390 will always inject the page directly */
2447 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2450 * s390 will always inject the page directly,
2451 * but we still want check_async_completion to cleanup
2456 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2459 struct kvm_arch_async_pf arch;
2462 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2464 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2465 vcpu->arch.pfault_compare)
2467 if (psw_extint_disabled(vcpu))
2469 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2471 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2473 if (!vcpu->arch.gmap->pfault_enabled)
2476 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2477 hva += current->thread.gmap_addr & ~PAGE_MASK;
2478 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2481 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2485 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2490 * On s390 notifications for arriving pages will be delivered directly
2491 * to the guest but the house keeping for completed pfaults is
2492 * handled outside the worker.
2494 kvm_check_async_pf_completion(vcpu);
2496 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2497 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2502 if (test_cpu_flag(CIF_MCCK_PENDING))
2505 if (!kvm_is_ucontrol(vcpu->kvm)) {
2506 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2511 rc = kvm_s390_handle_requests(vcpu);
2515 if (guestdbg_enabled(vcpu)) {
2516 kvm_s390_backup_guest_per_regs(vcpu);
2517 kvm_s390_patch_guest_per_regs(vcpu);
2520 vcpu->arch.sie_block->icptcode = 0;
2521 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2522 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2523 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2528 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2530 struct kvm_s390_pgm_info pgm_info = {
2531 .code = PGM_ADDRESSING,
2536 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2537 trace_kvm_s390_sie_fault(vcpu);
2540 * We want to inject an addressing exception, which is defined as a
2541 * suppressing or terminating exception. However, since we came here
2542 * by a DAT access exception, the PSW still points to the faulting
2543 * instruction since DAT exceptions are nullifying. So we've got
2544 * to look up the current opcode to get the length of the instruction
2545 * to be able to forward the PSW.
2547 rc = read_guest_instr(vcpu, &opcode, 1);
2548 ilen = insn_length(opcode);
2552 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2553 * Forward by arbitrary ilc, injection will take care of
2554 * nullification if necessary.
2556 pgm_info = vcpu->arch.pgm;
2559 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2560 kvm_s390_forward_psw(vcpu, ilen);
2561 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2564 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2566 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2567 vcpu->arch.sie_block->icptcode);
2568 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2570 if (guestdbg_enabled(vcpu))
2571 kvm_s390_restore_guest_per_regs(vcpu);
2573 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2574 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2576 if (vcpu->arch.sie_block->icptcode > 0) {
2577 int rc = kvm_handle_sie_intercept(vcpu);
2579 if (rc != -EOPNOTSUPP)
2581 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2582 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2583 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2584 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2586 } else if (exit_reason != -EFAULT) {
2587 vcpu->stat.exit_null++;
2589 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2590 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2591 vcpu->run->s390_ucontrol.trans_exc_code =
2592 current->thread.gmap_addr;
2593 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2595 } else if (current->thread.gmap_pfault) {
2596 trace_kvm_s390_major_guest_pfault(vcpu);
2597 current->thread.gmap_pfault = 0;
2598 if (kvm_arch_setup_async_pf(vcpu))
2600 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2602 return vcpu_post_run_fault_in_sie(vcpu);
2605 static int __vcpu_run(struct kvm_vcpu *vcpu)
2607 int rc, exit_reason;
2610 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2611 * ning the guest), so that memslots (and other stuff) are protected
2613 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2616 rc = vcpu_pre_run(vcpu);
2620 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2622 * As PF_VCPU will be used in fault handler, between
2623 * guest_enter and guest_exit should be no uaccess.
2625 local_irq_disable();
2626 __kvm_guest_enter();
2627 __disable_cpu_timer_accounting(vcpu);
2629 exit_reason = sie64a(vcpu->arch.sie_block,
2630 vcpu->run->s.regs.gprs);
2631 local_irq_disable();
2632 __enable_cpu_timer_accounting(vcpu);
2635 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2637 rc = vcpu_post_run(vcpu, exit_reason);
2638 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2640 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2644 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2646 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2647 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2648 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2649 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2650 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2651 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2652 /* some control register changes require a tlb flush */
2653 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2655 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2656 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2657 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2658 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2659 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2660 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2662 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2663 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2664 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2665 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2666 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2667 kvm_clear_async_pf_completion_queue(vcpu);
2669 kvm_run->kvm_dirty_regs = 0;
2672 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2674 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2675 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2676 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2677 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2678 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2679 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2680 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2681 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2682 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2683 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2684 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2685 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2688 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2693 if (guestdbg_exit_pending(vcpu)) {
2694 kvm_s390_prepare_debug_exit(vcpu);
2698 if (vcpu->sigset_active)
2699 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2701 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2702 kvm_s390_vcpu_start(vcpu);
2703 } else if (is_vcpu_stopped(vcpu)) {
2704 pr_err_ratelimited("can't run stopped vcpu %d\n",
2709 sync_regs(vcpu, kvm_run);
2710 enable_cpu_timer_accounting(vcpu);
2713 rc = __vcpu_run(vcpu);
2715 if (signal_pending(current) && !rc) {
2716 kvm_run->exit_reason = KVM_EXIT_INTR;
2720 if (guestdbg_exit_pending(vcpu) && !rc) {
2721 kvm_s390_prepare_debug_exit(vcpu);
2725 if (rc == -EREMOTE) {
2726 /* userspace support is needed, kvm_run has been prepared */
2730 disable_cpu_timer_accounting(vcpu);
2731 store_regs(vcpu, kvm_run);
2733 if (vcpu->sigset_active)
2734 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2736 vcpu->stat.exit_userspace++;
2741 * store status at address
2742 * we use have two special cases:
2743 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2744 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2746 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2748 unsigned char archmode = 1;
2749 freg_t fprs[NUM_FPRS];
2754 px = kvm_s390_get_prefix(vcpu);
2755 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2756 if (write_guest_abs(vcpu, 163, &archmode, 1))
2759 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2760 if (write_guest_real(vcpu, 163, &archmode, 1))
2764 gpa -= __LC_FPREGS_SAVE_AREA;
2766 /* manually convert vector registers if necessary */
2767 if (MACHINE_HAS_VX) {
2768 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2769 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2772 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2773 vcpu->run->s.regs.fprs, 128);
2775 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2776 vcpu->run->s.regs.gprs, 128);
2777 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2778 &vcpu->arch.sie_block->gpsw, 16);
2779 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2781 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2782 &vcpu->run->s.regs.fpc, 4);
2783 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2784 &vcpu->arch.sie_block->todpr, 4);
2785 cputm = kvm_s390_get_cpu_timer(vcpu);
2786 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2788 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2789 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2791 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2792 &vcpu->run->s.regs.acrs, 64);
2793 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2794 &vcpu->arch.sie_block->gcr, 128);
2795 return rc ? -EFAULT : 0;
2798 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2801 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2802 * copying in vcpu load/put. Lets update our copies before we save
2803 * it into the save area
2806 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2807 save_access_regs(vcpu->run->s.regs.acrs);
2809 return kvm_s390_store_status_unloaded(vcpu, addr);
2813 * store additional status at address
2815 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2818 /* Only bits 0-53 are used for address formation */
2819 if (!(gpa & ~0x3ff))
2822 return write_guest_abs(vcpu, gpa & ~0x3ff,
2823 (void *)&vcpu->run->s.regs.vrs, 512);
2826 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2828 if (!test_kvm_facility(vcpu->kvm, 129))
2832 * The guest VXRS are in the host VXRs due to the lazy
2833 * copying in vcpu load/put. We can simply call save_fpu_regs()
2834 * to save the current register state because we are in the
2835 * middle of a load/put cycle.
2837 * Let's update our copies before we save it into the save area.
2841 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2844 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2846 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2847 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2850 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2853 struct kvm_vcpu *vcpu;
2855 kvm_for_each_vcpu(i, vcpu, kvm) {
2856 __disable_ibs_on_vcpu(vcpu);
2860 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2864 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2865 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2868 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2870 int i, online_vcpus, started_vcpus = 0;
2872 if (!is_vcpu_stopped(vcpu))
2875 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2876 /* Only one cpu at a time may enter/leave the STOPPED state. */
2877 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2878 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2880 for (i = 0; i < online_vcpus; i++) {
2881 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2885 if (started_vcpus == 0) {
2886 /* we're the only active VCPU -> speed it up */
2887 __enable_ibs_on_vcpu(vcpu);
2888 } else if (started_vcpus == 1) {
2890 * As we are starting a second VCPU, we have to disable
2891 * the IBS facility on all VCPUs to remove potentially
2892 * oustanding ENABLE requests.
2894 __disable_ibs_on_all_vcpus(vcpu->kvm);
2897 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2899 * Another VCPU might have used IBS while we were offline.
2900 * Let's play safe and flush the VCPU at startup.
2902 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2903 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2907 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2909 int i, online_vcpus, started_vcpus = 0;
2910 struct kvm_vcpu *started_vcpu = NULL;
2912 if (is_vcpu_stopped(vcpu))
2915 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2916 /* Only one cpu at a time may enter/leave the STOPPED state. */
2917 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2918 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2920 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2921 kvm_s390_clear_stop_irq(vcpu);
2923 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2924 __disable_ibs_on_vcpu(vcpu);
2926 for (i = 0; i < online_vcpus; i++) {
2927 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2929 started_vcpu = vcpu->kvm->vcpus[i];
2933 if (started_vcpus == 1) {
2935 * As we only have one VCPU left, we want to enable the
2936 * IBS facility for that VCPU to speed it up.
2938 __enable_ibs_on_vcpu(started_vcpu);
2941 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2945 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2946 struct kvm_enable_cap *cap)
2954 case KVM_CAP_S390_CSS_SUPPORT:
2955 if (!vcpu->kvm->arch.css_support) {
2956 vcpu->kvm->arch.css_support = 1;
2957 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2958 trace_kvm_s390_enable_css(vcpu->kvm);
2969 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2970 struct kvm_s390_mem_op *mop)
2972 void __user *uaddr = (void __user *)mop->buf;
2973 void *tmpbuf = NULL;
2975 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2976 | KVM_S390_MEMOP_F_CHECK_ONLY;
2978 if (mop->flags & ~supported_flags)
2981 if (mop->size > MEM_OP_MAX_SIZE)
2984 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2985 tmpbuf = vmalloc(mop->size);
2990 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2993 case KVM_S390_MEMOP_LOGICAL_READ:
2994 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2995 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2996 mop->size, GACC_FETCH);
2999 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3001 if (copy_to_user(uaddr, tmpbuf, mop->size))
3005 case KVM_S390_MEMOP_LOGICAL_WRITE:
3006 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3007 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3008 mop->size, GACC_STORE);
3011 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3015 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3021 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3023 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3024 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3030 long kvm_arch_vcpu_ioctl(struct file *filp,
3031 unsigned int ioctl, unsigned long arg)
3033 struct kvm_vcpu *vcpu = filp->private_data;
3034 void __user *argp = (void __user *)arg;
3039 case KVM_S390_IRQ: {
3040 struct kvm_s390_irq s390irq;
3043 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3045 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3048 case KVM_S390_INTERRUPT: {
3049 struct kvm_s390_interrupt s390int;
3050 struct kvm_s390_irq s390irq;
3053 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3055 if (s390int_to_s390irq(&s390int, &s390irq))
3057 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3060 case KVM_S390_STORE_STATUS:
3061 idx = srcu_read_lock(&vcpu->kvm->srcu);
3062 r = kvm_s390_vcpu_store_status(vcpu, arg);
3063 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3065 case KVM_S390_SET_INITIAL_PSW: {
3069 if (copy_from_user(&psw, argp, sizeof(psw)))
3071 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3074 case KVM_S390_INITIAL_RESET:
3075 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3077 case KVM_SET_ONE_REG:
3078 case KVM_GET_ONE_REG: {
3079 struct kvm_one_reg reg;
3081 if (copy_from_user(®, argp, sizeof(reg)))
3083 if (ioctl == KVM_SET_ONE_REG)
3084 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3086 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3089 #ifdef CONFIG_KVM_S390_UCONTROL
3090 case KVM_S390_UCAS_MAP: {
3091 struct kvm_s390_ucas_mapping ucasmap;
3093 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3098 if (!kvm_is_ucontrol(vcpu->kvm)) {
3103 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3104 ucasmap.vcpu_addr, ucasmap.length);
3107 case KVM_S390_UCAS_UNMAP: {
3108 struct kvm_s390_ucas_mapping ucasmap;
3110 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3115 if (!kvm_is_ucontrol(vcpu->kvm)) {
3120 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3125 case KVM_S390_VCPU_FAULT: {
3126 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3129 case KVM_ENABLE_CAP:
3131 struct kvm_enable_cap cap;
3133 if (copy_from_user(&cap, argp, sizeof(cap)))
3135 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3138 case KVM_S390_MEM_OP: {
3139 struct kvm_s390_mem_op mem_op;
3141 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3142 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3147 case KVM_S390_SET_IRQ_STATE: {
3148 struct kvm_s390_irq_state irq_state;
3151 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3153 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3154 irq_state.len == 0 ||
3155 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3159 r = kvm_s390_set_irq_state(vcpu,
3160 (void __user *) irq_state.buf,
3164 case KVM_S390_GET_IRQ_STATE: {
3165 struct kvm_s390_irq_state irq_state;
3168 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3170 if (irq_state.len == 0) {
3174 r = kvm_s390_get_irq_state(vcpu,
3175 (__u8 __user *) irq_state.buf,
3185 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3187 #ifdef CONFIG_KVM_S390_UCONTROL
3188 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3189 && (kvm_is_ucontrol(vcpu->kvm))) {
3190 vmf->page = virt_to_page(vcpu->arch.sie_block);
3191 get_page(vmf->page);
3195 return VM_FAULT_SIGBUS;
3198 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3199 unsigned long npages)
3204 /* Section: memory related */
3205 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3206 struct kvm_memory_slot *memslot,
3207 const struct kvm_userspace_memory_region *mem,
3208 enum kvm_mr_change change)
3210 /* A few sanity checks. We can have memory slots which have to be
3211 located/ended at a segment boundary (1MB). The memory in userland is
3212 ok to be fragmented into various different vmas. It is okay to mmap()
3213 and munmap() stuff in this slot after doing this call at any time */
3215 if (mem->userspace_addr & 0xffffful)
3218 if (mem->memory_size & 0xffffful)
3221 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3227 void kvm_arch_commit_memory_region(struct kvm *kvm,
3228 const struct kvm_userspace_memory_region *mem,
3229 const struct kvm_memory_slot *old,
3230 const struct kvm_memory_slot *new,
3231 enum kvm_mr_change change)
3235 /* If the basics of the memslot do not change, we do not want
3236 * to update the gmap. Every update causes several unnecessary
3237 * segment translation exceptions. This is usually handled just
3238 * fine by the normal fault handler + gmap, but it will also
3239 * cause faults on the prefix page of running guest CPUs.
3241 if (old->userspace_addr == mem->userspace_addr &&
3242 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3243 old->npages * PAGE_SIZE == mem->memory_size)
3246 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3247 mem->guest_phys_addr, mem->memory_size);
3249 pr_warn("failed to commit memory region\n");
3253 static inline unsigned long nonhyp_mask(int i)
3255 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3257 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3260 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3262 vcpu->valid_wakeup = false;
3265 static int __init kvm_s390_init(void)
3269 if (!sclp.has_sief2) {
3270 pr_info("SIE not available\n");
3274 for (i = 0; i < 16; i++)
3275 kvm_s390_fac_list_mask[i] |=
3276 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3278 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3281 static void __exit kvm_s390_exit(void)
3286 module_init(kvm_s390_init);
3287 module_exit(kvm_s390_exit);
3290 * Enable autoloading of the kvm module.
3291 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3292 * since x86 takes a different approach.
3294 #include <linux/miscdevice.h>
3295 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3296 MODULE_ALIAS("devname:kvm");