2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/pgtable.h>
33 #include <asm/switch_to.h>
39 #define CREATE_TRACE_POINTS
41 #include "trace-s390.h"
43 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
45 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
46 (KVM_MAX_VCPUS + LOCAL_IRQS))
48 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
50 struct kvm_stats_debugfs_item debugfs_entries[] = {
51 { "userspace_handled", VCPU_STAT(exit_userspace) },
52 { "exit_null", VCPU_STAT(exit_null) },
53 { "exit_validity", VCPU_STAT(exit_validity) },
54 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
55 { "exit_external_request", VCPU_STAT(exit_external_request) },
56 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
57 { "exit_instruction", VCPU_STAT(exit_instruction) },
58 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
59 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
60 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
61 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
62 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
63 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
64 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
65 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
66 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
67 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
68 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
69 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
70 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
71 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
72 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
73 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
74 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
75 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
76 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
77 { "instruction_spx", VCPU_STAT(instruction_spx) },
78 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
79 { "instruction_stap", VCPU_STAT(instruction_stap) },
80 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
81 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
82 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
83 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
84 { "instruction_essa", VCPU_STAT(instruction_essa) },
85 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
86 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
87 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
88 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
89 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
90 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
91 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
92 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
93 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
94 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
95 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
96 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
97 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
98 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
99 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
100 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
101 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
102 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
103 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
104 { "diagnose_10", VCPU_STAT(diagnose_10) },
105 { "diagnose_44", VCPU_STAT(diagnose_44) },
106 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
110 /* upper facilities limit for kvm */
111 unsigned long kvm_s390_fac_list_mask[] = {
112 0xffe6fffbfcfdfc40UL,
113 0x205c800000000000UL,
116 unsigned long kvm_s390_fac_list_mask_size(void)
118 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
119 return ARRAY_SIZE(kvm_s390_fac_list_mask);
122 static struct gmap_notifier gmap_notifier;
124 /* Section: not file related */
125 int kvm_arch_hardware_enable(void)
127 /* every s390 is virtualization enabled ;-) */
131 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
133 int kvm_arch_hardware_setup(void)
135 gmap_notifier.notifier_call = kvm_gmap_notifier;
136 gmap_register_ipte_notifier(&gmap_notifier);
140 void kvm_arch_hardware_unsetup(void)
142 gmap_unregister_ipte_notifier(&gmap_notifier);
145 int kvm_arch_init(void *opaque)
147 /* Register floating interrupt controller interface. */
148 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
151 /* Section: device related */
152 long kvm_arch_dev_ioctl(struct file *filp,
153 unsigned int ioctl, unsigned long arg)
155 if (ioctl == KVM_S390_ENABLE_SIE)
156 return s390_enable_sie();
160 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
165 case KVM_CAP_S390_PSW:
166 case KVM_CAP_S390_GMAP:
167 case KVM_CAP_SYNC_MMU:
168 #ifdef CONFIG_KVM_S390_UCONTROL
169 case KVM_CAP_S390_UCONTROL:
171 case KVM_CAP_ASYNC_PF:
172 case KVM_CAP_SYNC_REGS:
173 case KVM_CAP_ONE_REG:
174 case KVM_CAP_ENABLE_CAP:
175 case KVM_CAP_S390_CSS_SUPPORT:
177 case KVM_CAP_IOEVENTFD:
178 case KVM_CAP_DEVICE_CTRL:
179 case KVM_CAP_ENABLE_CAP_VM:
180 case KVM_CAP_S390_IRQCHIP:
181 case KVM_CAP_VM_ATTRIBUTES:
182 case KVM_CAP_MP_STATE:
183 case KVM_CAP_S390_INJECT_IRQ:
184 case KVM_CAP_S390_USER_SIGP:
185 case KVM_CAP_S390_USER_STSI:
186 case KVM_CAP_S390_SKEYS:
187 case KVM_CAP_S390_IRQ_STATE:
190 case KVM_CAP_S390_MEM_OP:
193 case KVM_CAP_NR_VCPUS:
194 case KVM_CAP_MAX_VCPUS:
197 case KVM_CAP_NR_MEMSLOTS:
198 r = KVM_USER_MEM_SLOTS;
200 case KVM_CAP_S390_COW:
201 r = MACHINE_HAS_ESOP;
203 case KVM_CAP_S390_VECTOR_REGISTERS:
212 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
213 struct kvm_memory_slot *memslot)
215 gfn_t cur_gfn, last_gfn;
216 unsigned long address;
217 struct gmap *gmap = kvm->arch.gmap;
219 down_read(&gmap->mm->mmap_sem);
220 /* Loop over all guest pages */
221 last_gfn = memslot->base_gfn + memslot->npages;
222 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
223 address = gfn_to_hva_memslot(memslot, cur_gfn);
225 if (gmap_test_and_clear_dirty(address, gmap))
226 mark_page_dirty(kvm, cur_gfn);
228 up_read(&gmap->mm->mmap_sem);
231 /* Section: vm related */
233 * Get (and clear) the dirty memory log for a memory slot.
235 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
236 struct kvm_dirty_log *log)
240 struct kvm_memory_slot *memslot;
243 mutex_lock(&kvm->slots_lock);
246 if (log->slot >= KVM_USER_MEM_SLOTS)
249 memslot = id_to_memslot(kvm->memslots, log->slot);
251 if (!memslot->dirty_bitmap)
254 kvm_s390_sync_dirty_log(kvm, memslot);
255 r = kvm_get_dirty_log(kvm, log, &is_dirty);
259 /* Clear the dirty log */
261 n = kvm_dirty_bitmap_bytes(memslot);
262 memset(memslot->dirty_bitmap, 0, n);
266 mutex_unlock(&kvm->slots_lock);
270 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
278 case KVM_CAP_S390_IRQCHIP:
279 kvm->arch.use_irqchip = 1;
282 case KVM_CAP_S390_USER_SIGP:
283 kvm->arch.user_sigp = 1;
286 case KVM_CAP_S390_VECTOR_REGISTERS:
287 if (MACHINE_HAS_VX) {
288 set_kvm_facility(kvm->arch.model.fac->mask, 129);
289 set_kvm_facility(kvm->arch.model.fac->list, 129);
294 case KVM_CAP_S390_USER_STSI:
295 kvm->arch.user_stsi = 1;
305 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
309 switch (attr->attr) {
310 case KVM_S390_VM_MEM_LIMIT_SIZE:
312 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
322 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
326 switch (attr->attr) {
327 case KVM_S390_VM_MEM_ENABLE_CMMA:
329 mutex_lock(&kvm->lock);
330 if (atomic_read(&kvm->online_vcpus) == 0) {
331 kvm->arch.use_cmma = 1;
334 mutex_unlock(&kvm->lock);
336 case KVM_S390_VM_MEM_CLR_CMMA:
337 mutex_lock(&kvm->lock);
338 idx = srcu_read_lock(&kvm->srcu);
339 s390_reset_cmma(kvm->arch.gmap->mm);
340 srcu_read_unlock(&kvm->srcu, idx);
341 mutex_unlock(&kvm->lock);
344 case KVM_S390_VM_MEM_LIMIT_SIZE: {
345 unsigned long new_limit;
347 if (kvm_is_ucontrol(kvm))
350 if (get_user(new_limit, (u64 __user *)attr->addr))
353 if (new_limit > kvm->arch.gmap->asce_end)
357 mutex_lock(&kvm->lock);
358 if (atomic_read(&kvm->online_vcpus) == 0) {
359 /* gmap_alloc will round the limit up */
360 struct gmap *new = gmap_alloc(current->mm, new_limit);
365 gmap_free(kvm->arch.gmap);
367 kvm->arch.gmap = new;
371 mutex_unlock(&kvm->lock);
381 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
383 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
385 struct kvm_vcpu *vcpu;
388 if (!test_kvm_facility(kvm, 76))
391 mutex_lock(&kvm->lock);
392 switch (attr->attr) {
393 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
395 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
396 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
397 kvm->arch.crypto.aes_kw = 1;
399 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
401 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
402 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
403 kvm->arch.crypto.dea_kw = 1;
405 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
406 kvm->arch.crypto.aes_kw = 0;
407 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
408 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
410 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
411 kvm->arch.crypto.dea_kw = 0;
412 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
413 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
416 mutex_unlock(&kvm->lock);
420 kvm_for_each_vcpu(i, vcpu, kvm) {
421 kvm_s390_vcpu_crypto_setup(vcpu);
424 mutex_unlock(&kvm->lock);
428 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
432 if (copy_from_user(>od_high, (void __user *)attr->addr,
442 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
444 struct kvm_vcpu *cur_vcpu;
445 unsigned int vcpu_idx;
449 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
452 r = store_tod_clock(&host_tod);
456 mutex_lock(&kvm->lock);
457 kvm->arch.epoch = gtod - host_tod;
458 kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
459 cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
462 mutex_unlock(&kvm->lock);
466 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
473 switch (attr->attr) {
474 case KVM_S390_VM_TOD_HIGH:
475 ret = kvm_s390_set_tod_high(kvm, attr);
477 case KVM_S390_VM_TOD_LOW:
478 ret = kvm_s390_set_tod_low(kvm, attr);
487 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
491 if (copy_to_user((void __user *)attr->addr, >od_high,
498 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
503 r = store_tod_clock(&host_tod);
507 gtod = host_tod + kvm->arch.epoch;
508 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
514 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
521 switch (attr->attr) {
522 case KVM_S390_VM_TOD_HIGH:
523 ret = kvm_s390_get_tod_high(kvm, attr);
525 case KVM_S390_VM_TOD_LOW:
526 ret = kvm_s390_get_tod_low(kvm, attr);
535 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
537 struct kvm_s390_vm_cpu_processor *proc;
540 mutex_lock(&kvm->lock);
541 if (atomic_read(&kvm->online_vcpus)) {
545 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
550 if (!copy_from_user(proc, (void __user *)attr->addr,
552 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
553 sizeof(struct cpuid));
554 kvm->arch.model.ibc = proc->ibc;
555 memcpy(kvm->arch.model.fac->list, proc->fac_list,
556 S390_ARCH_FAC_LIST_SIZE_BYTE);
561 mutex_unlock(&kvm->lock);
565 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
569 switch (attr->attr) {
570 case KVM_S390_VM_CPU_PROCESSOR:
571 ret = kvm_s390_set_processor(kvm, attr);
577 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
579 struct kvm_s390_vm_cpu_processor *proc;
582 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
587 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
588 proc->ibc = kvm->arch.model.ibc;
589 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
590 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
597 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
599 struct kvm_s390_vm_cpu_machine *mach;
602 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
607 get_cpu_id((struct cpuid *) &mach->cpuid);
608 mach->ibc = sclp_get_ibc();
609 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
610 S390_ARCH_FAC_LIST_SIZE_BYTE);
611 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
612 S390_ARCH_FAC_LIST_SIZE_BYTE);
613 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
620 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
624 switch (attr->attr) {
625 case KVM_S390_VM_CPU_PROCESSOR:
626 ret = kvm_s390_get_processor(kvm, attr);
628 case KVM_S390_VM_CPU_MACHINE:
629 ret = kvm_s390_get_machine(kvm, attr);
635 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
639 switch (attr->group) {
640 case KVM_S390_VM_MEM_CTRL:
641 ret = kvm_s390_set_mem_control(kvm, attr);
643 case KVM_S390_VM_TOD:
644 ret = kvm_s390_set_tod(kvm, attr);
646 case KVM_S390_VM_CPU_MODEL:
647 ret = kvm_s390_set_cpu_model(kvm, attr);
649 case KVM_S390_VM_CRYPTO:
650 ret = kvm_s390_vm_set_crypto(kvm, attr);
660 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
664 switch (attr->group) {
665 case KVM_S390_VM_MEM_CTRL:
666 ret = kvm_s390_get_mem_control(kvm, attr);
668 case KVM_S390_VM_TOD:
669 ret = kvm_s390_get_tod(kvm, attr);
671 case KVM_S390_VM_CPU_MODEL:
672 ret = kvm_s390_get_cpu_model(kvm, attr);
682 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
686 switch (attr->group) {
687 case KVM_S390_VM_MEM_CTRL:
688 switch (attr->attr) {
689 case KVM_S390_VM_MEM_ENABLE_CMMA:
690 case KVM_S390_VM_MEM_CLR_CMMA:
691 case KVM_S390_VM_MEM_LIMIT_SIZE:
699 case KVM_S390_VM_TOD:
700 switch (attr->attr) {
701 case KVM_S390_VM_TOD_LOW:
702 case KVM_S390_VM_TOD_HIGH:
710 case KVM_S390_VM_CPU_MODEL:
711 switch (attr->attr) {
712 case KVM_S390_VM_CPU_PROCESSOR:
713 case KVM_S390_VM_CPU_MACHINE:
721 case KVM_S390_VM_CRYPTO:
722 switch (attr->attr) {
723 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
724 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
725 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
726 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
742 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
746 unsigned long curkey;
749 if (args->flags != 0)
752 /* Is this guest using storage keys? */
753 if (!mm_use_skey(current->mm))
754 return KVM_S390_GET_SKEYS_NONE;
756 /* Enforce sane limit on memory allocation */
757 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
760 keys = kmalloc_array(args->count, sizeof(uint8_t),
761 GFP_KERNEL | __GFP_NOWARN);
763 keys = vmalloc(sizeof(uint8_t) * args->count);
767 for (i = 0; i < args->count; i++) {
768 hva = gfn_to_hva(kvm, args->start_gfn + i);
769 if (kvm_is_error_hva(hva)) {
774 curkey = get_guest_storage_key(current->mm, hva);
775 if (IS_ERR_VALUE(curkey)) {
782 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
783 sizeof(uint8_t) * args->count);
791 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
797 if (args->flags != 0)
800 /* Enforce sane limit on memory allocation */
801 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
804 keys = kmalloc_array(args->count, sizeof(uint8_t),
805 GFP_KERNEL | __GFP_NOWARN);
807 keys = vmalloc(sizeof(uint8_t) * args->count);
811 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
812 sizeof(uint8_t) * args->count);
818 /* Enable storage key handling for the guest */
821 for (i = 0; i < args->count; i++) {
822 hva = gfn_to_hva(kvm, args->start_gfn + i);
823 if (kvm_is_error_hva(hva)) {
828 /* Lowest order bit is reserved */
829 if (keys[i] & 0x01) {
834 r = set_guest_storage_key(current->mm, hva,
835 (unsigned long)keys[i], 0);
844 long kvm_arch_vm_ioctl(struct file *filp,
845 unsigned int ioctl, unsigned long arg)
847 struct kvm *kvm = filp->private_data;
848 void __user *argp = (void __user *)arg;
849 struct kvm_device_attr attr;
853 case KVM_S390_INTERRUPT: {
854 struct kvm_s390_interrupt s390int;
857 if (copy_from_user(&s390int, argp, sizeof(s390int)))
859 r = kvm_s390_inject_vm(kvm, &s390int);
862 case KVM_ENABLE_CAP: {
863 struct kvm_enable_cap cap;
865 if (copy_from_user(&cap, argp, sizeof(cap)))
867 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
870 case KVM_CREATE_IRQCHIP: {
871 struct kvm_irq_routing_entry routing;
874 if (kvm->arch.use_irqchip) {
875 /* Set up dummy routing. */
876 memset(&routing, 0, sizeof(routing));
877 kvm_set_irq_routing(kvm, &routing, 0, 0);
882 case KVM_SET_DEVICE_ATTR: {
884 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
886 r = kvm_s390_vm_set_attr(kvm, &attr);
889 case KVM_GET_DEVICE_ATTR: {
891 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
893 r = kvm_s390_vm_get_attr(kvm, &attr);
896 case KVM_HAS_DEVICE_ATTR: {
898 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
900 r = kvm_s390_vm_has_attr(kvm, &attr);
903 case KVM_S390_GET_SKEYS: {
904 struct kvm_s390_skeys args;
907 if (copy_from_user(&args, argp,
908 sizeof(struct kvm_s390_skeys)))
910 r = kvm_s390_get_skeys(kvm, &args);
913 case KVM_S390_SET_SKEYS: {
914 struct kvm_s390_skeys args;
917 if (copy_from_user(&args, argp,
918 sizeof(struct kvm_s390_skeys)))
920 r = kvm_s390_set_skeys(kvm, &args);
930 static int kvm_s390_query_ap_config(u8 *config)
932 u32 fcn_code = 0x04000000UL;
935 memset(config, 0, 128);
939 ".long 0xb2af0000\n" /* PQAP(QCI) */
945 : "r" (fcn_code), "r" (config)
946 : "cc", "0", "2", "memory"
952 static int kvm_s390_apxa_installed(void)
957 if (test_facility(2) && test_facility(12)) {
958 cc = kvm_s390_query_ap_config(config);
961 pr_err("PQAP(QCI) failed with cc=%d", cc);
963 return config[0] & 0x40;
969 static void kvm_s390_set_crycb_format(struct kvm *kvm)
971 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
973 if (kvm_s390_apxa_installed())
974 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
976 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
979 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
982 cpu_id->version = 0xff;
985 static int kvm_s390_crypto_init(struct kvm *kvm)
987 if (!test_kvm_facility(kvm, 76))
990 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
991 GFP_KERNEL | GFP_DMA);
992 if (!kvm->arch.crypto.crycb)
995 kvm_s390_set_crycb_format(kvm);
997 /* Enable AES/DEA protected key functions by default */
998 kvm->arch.crypto.aes_kw = 1;
999 kvm->arch.crypto.dea_kw = 1;
1000 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1001 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1002 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1003 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1008 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1011 char debug_name[16];
1012 static unsigned long sca_offset;
1015 #ifdef CONFIG_KVM_S390_UCONTROL
1016 if (type & ~KVM_VM_S390_UCONTROL)
1018 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1025 rc = s390_enable_sie();
1031 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1034 spin_lock(&kvm_lock);
1035 sca_offset = (sca_offset + 16) & 0x7f0;
1036 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1037 spin_unlock(&kvm_lock);
1039 sprintf(debug_name, "kvm-%u", current->pid);
1041 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
1046 * The architectural maximum amount of facilities is 16 kbit. To store
1047 * this amount, 2 kbyte of memory is required. Thus we need a full
1048 * page to hold the guest facility list (arch.model.fac->list) and the
1049 * facility mask (arch.model.fac->mask). Its address size has to be
1050 * 31 bits and word aligned.
1052 kvm->arch.model.fac =
1053 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1054 if (!kvm->arch.model.fac)
1057 /* Populate the facility mask initially. */
1058 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1059 S390_ARCH_FAC_LIST_SIZE_BYTE);
1060 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1061 if (i < kvm_s390_fac_list_mask_size())
1062 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1064 kvm->arch.model.fac->mask[i] = 0UL;
1067 /* Populate the facility list initially. */
1068 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1069 S390_ARCH_FAC_LIST_SIZE_BYTE);
1071 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1072 kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
1074 if (kvm_s390_crypto_init(kvm) < 0)
1077 spin_lock_init(&kvm->arch.float_int.lock);
1078 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1079 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1080 init_waitqueue_head(&kvm->arch.ipte_wq);
1081 mutex_init(&kvm->arch.ipte_mutex);
1083 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1084 VM_EVENT(kvm, 3, "%s", "vm created");
1086 if (type & KVM_VM_S390_UCONTROL) {
1087 kvm->arch.gmap = NULL;
1089 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1090 if (!kvm->arch.gmap)
1092 kvm->arch.gmap->private = kvm;
1093 kvm->arch.gmap->pfault_enabled = 0;
1096 kvm->arch.css_support = 0;
1097 kvm->arch.use_irqchip = 0;
1098 kvm->arch.epoch = 0;
1100 spin_lock_init(&kvm->arch.start_stop_lock);
1104 kfree(kvm->arch.crypto.crycb);
1105 free_page((unsigned long)kvm->arch.model.fac);
1106 debug_unregister(kvm->arch.dbf);
1107 free_page((unsigned long)(kvm->arch.sca));
1111 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1113 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1114 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1115 kvm_s390_clear_local_irqs(vcpu);
1116 kvm_clear_async_pf_completion_queue(vcpu);
1117 if (!kvm_is_ucontrol(vcpu->kvm)) {
1118 clear_bit(63 - vcpu->vcpu_id,
1119 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1120 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1121 (__u64) vcpu->arch.sie_block)
1122 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1126 if (kvm_is_ucontrol(vcpu->kvm))
1127 gmap_free(vcpu->arch.gmap);
1129 if (kvm_s390_cmma_enabled(vcpu->kvm))
1130 kvm_s390_vcpu_unsetup_cmma(vcpu);
1131 free_page((unsigned long)(vcpu->arch.sie_block));
1133 kvm_vcpu_uninit(vcpu);
1134 kmem_cache_free(kvm_vcpu_cache, vcpu);
1137 static void kvm_free_vcpus(struct kvm *kvm)
1140 struct kvm_vcpu *vcpu;
1142 kvm_for_each_vcpu(i, vcpu, kvm)
1143 kvm_arch_vcpu_destroy(vcpu);
1145 mutex_lock(&kvm->lock);
1146 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1147 kvm->vcpus[i] = NULL;
1149 atomic_set(&kvm->online_vcpus, 0);
1150 mutex_unlock(&kvm->lock);
1153 void kvm_arch_destroy_vm(struct kvm *kvm)
1155 kvm_free_vcpus(kvm);
1156 free_page((unsigned long)kvm->arch.model.fac);
1157 free_page((unsigned long)(kvm->arch.sca));
1158 debug_unregister(kvm->arch.dbf);
1159 kfree(kvm->arch.crypto.crycb);
1160 if (!kvm_is_ucontrol(kvm))
1161 gmap_free(kvm->arch.gmap);
1162 kvm_s390_destroy_adapters(kvm);
1163 kvm_s390_clear_float_irqs(kvm);
1166 /* Section: vcpu related */
1167 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1169 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1170 if (!vcpu->arch.gmap)
1172 vcpu->arch.gmap->private = vcpu->kvm;
1177 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1179 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1180 kvm_clear_async_pf_completion_queue(vcpu);
1181 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1187 if (test_kvm_facility(vcpu->kvm, 129))
1188 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1190 if (kvm_is_ucontrol(vcpu->kvm))
1191 return __kvm_ucontrol_vcpu_init(vcpu);
1196 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1198 save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
1199 if (test_kvm_facility(vcpu->kvm, 129))
1200 save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
1202 save_fp_regs(vcpu->arch.host_fpregs.fprs);
1203 save_access_regs(vcpu->arch.host_acrs);
1204 if (test_kvm_facility(vcpu->kvm, 129)) {
1205 restore_fp_ctl(&vcpu->run->s.regs.fpc);
1206 restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
1208 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1209 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
1211 restore_access_regs(vcpu->run->s.regs.acrs);
1212 gmap_enable(vcpu->arch.gmap);
1213 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1216 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1218 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1219 gmap_disable(vcpu->arch.gmap);
1220 if (test_kvm_facility(vcpu->kvm, 129)) {
1221 save_fp_ctl(&vcpu->run->s.regs.fpc);
1222 save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
1224 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1225 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1227 save_access_regs(vcpu->run->s.regs.acrs);
1228 restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
1229 if (test_kvm_facility(vcpu->kvm, 129))
1230 restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
1232 restore_fp_regs(vcpu->arch.host_fpregs.fprs);
1233 restore_access_regs(vcpu->arch.host_acrs);
1236 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1238 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1239 vcpu->arch.sie_block->gpsw.mask = 0UL;
1240 vcpu->arch.sie_block->gpsw.addr = 0UL;
1241 kvm_s390_set_prefix(vcpu, 0);
1242 vcpu->arch.sie_block->cputm = 0UL;
1243 vcpu->arch.sie_block->ckc = 0UL;
1244 vcpu->arch.sie_block->todpr = 0;
1245 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1246 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1247 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1248 vcpu->arch.guest_fpregs.fpc = 0;
1249 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1250 vcpu->arch.sie_block->gbea = 1;
1251 vcpu->arch.sie_block->pp = 0;
1252 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1253 kvm_clear_async_pf_completion_queue(vcpu);
1254 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1255 kvm_s390_vcpu_stop(vcpu);
1256 kvm_s390_clear_local_irqs(vcpu);
1259 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1261 mutex_lock(&vcpu->kvm->lock);
1262 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1263 mutex_unlock(&vcpu->kvm->lock);
1264 if (!kvm_is_ucontrol(vcpu->kvm))
1265 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1268 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1270 if (!test_kvm_facility(vcpu->kvm, 76))
1273 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1275 if (vcpu->kvm->arch.crypto.aes_kw)
1276 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1277 if (vcpu->kvm->arch.crypto.dea_kw)
1278 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1280 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1283 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1285 free_page(vcpu->arch.sie_block->cbrlo);
1286 vcpu->arch.sie_block->cbrlo = 0;
1289 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1291 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1292 if (!vcpu->arch.sie_block->cbrlo)
1295 vcpu->arch.sie_block->ecb2 |= 0x80;
1296 vcpu->arch.sie_block->ecb2 &= ~0x08;
1300 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1302 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1304 vcpu->arch.cpu_id = model->cpu_id;
1305 vcpu->arch.sie_block->ibc = model->ibc;
1306 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1309 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1313 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1317 kvm_s390_vcpu_setup_model(vcpu);
1319 vcpu->arch.sie_block->ecb = 6;
1320 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1321 vcpu->arch.sie_block->ecb |= 0x10;
1323 vcpu->arch.sie_block->ecb2 = 8;
1324 vcpu->arch.sie_block->eca = 0xC1002000U;
1325 if (sclp_has_siif())
1326 vcpu->arch.sie_block->eca |= 1;
1327 if (sclp_has_sigpif())
1328 vcpu->arch.sie_block->eca |= 0x10000000U;
1329 if (test_kvm_facility(vcpu->kvm, 129)) {
1330 vcpu->arch.sie_block->eca |= 0x00020000;
1331 vcpu->arch.sie_block->ecd |= 0x20000000;
1333 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1335 if (kvm_s390_cmma_enabled(vcpu->kvm)) {
1336 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1340 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1341 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1343 kvm_s390_vcpu_crypto_setup(vcpu);
1348 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1351 struct kvm_vcpu *vcpu;
1352 struct sie_page *sie_page;
1355 if (id >= KVM_MAX_VCPUS)
1360 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1364 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1368 vcpu->arch.sie_block = &sie_page->sie_block;
1369 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1370 vcpu->arch.host_vregs = &sie_page->vregs;
1372 vcpu->arch.sie_block->icpua = id;
1373 if (!kvm_is_ucontrol(kvm)) {
1374 if (!kvm->arch.sca) {
1378 if (!kvm->arch.sca->cpu[id].sda)
1379 kvm->arch.sca->cpu[id].sda =
1380 (__u64) vcpu->arch.sie_block;
1381 vcpu->arch.sie_block->scaoh =
1382 (__u32)(((__u64)kvm->arch.sca) >> 32);
1383 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1384 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1387 spin_lock_init(&vcpu->arch.local_int.lock);
1388 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1389 vcpu->arch.local_int.wq = &vcpu->wq;
1390 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1392 rc = kvm_vcpu_init(vcpu, kvm, id);
1394 goto out_free_sie_block;
1395 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1396 vcpu->arch.sie_block);
1397 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1401 free_page((unsigned long)(vcpu->arch.sie_block));
1403 kmem_cache_free(kvm_vcpu_cache, vcpu);
1408 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1410 return kvm_s390_vcpu_has_irq(vcpu, 0);
1413 void s390_vcpu_block(struct kvm_vcpu *vcpu)
1415 atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1418 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1420 atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1424 * Kick a guest cpu out of SIE and wait until SIE is not running.
1425 * If the CPU is not running (e.g. waiting as idle) the function will
1426 * return immediately. */
1427 void exit_sie(struct kvm_vcpu *vcpu)
1429 atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1430 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1434 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
1435 void exit_sie_sync(struct kvm_vcpu *vcpu)
1437 s390_vcpu_block(vcpu);
1441 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1444 struct kvm *kvm = gmap->private;
1445 struct kvm_vcpu *vcpu;
1447 kvm_for_each_vcpu(i, vcpu, kvm) {
1448 /* match against both prefix pages */
1449 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1450 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1451 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
1452 exit_sie_sync(vcpu);
1457 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1459 /* kvm common code refers to this, but never calls it */
1464 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1465 struct kvm_one_reg *reg)
1470 case KVM_REG_S390_TODPR:
1471 r = put_user(vcpu->arch.sie_block->todpr,
1472 (u32 __user *)reg->addr);
1474 case KVM_REG_S390_EPOCHDIFF:
1475 r = put_user(vcpu->arch.sie_block->epoch,
1476 (u64 __user *)reg->addr);
1478 case KVM_REG_S390_CPU_TIMER:
1479 r = put_user(vcpu->arch.sie_block->cputm,
1480 (u64 __user *)reg->addr);
1482 case KVM_REG_S390_CLOCK_COMP:
1483 r = put_user(vcpu->arch.sie_block->ckc,
1484 (u64 __user *)reg->addr);
1486 case KVM_REG_S390_PFTOKEN:
1487 r = put_user(vcpu->arch.pfault_token,
1488 (u64 __user *)reg->addr);
1490 case KVM_REG_S390_PFCOMPARE:
1491 r = put_user(vcpu->arch.pfault_compare,
1492 (u64 __user *)reg->addr);
1494 case KVM_REG_S390_PFSELECT:
1495 r = put_user(vcpu->arch.pfault_select,
1496 (u64 __user *)reg->addr);
1498 case KVM_REG_S390_PP:
1499 r = put_user(vcpu->arch.sie_block->pp,
1500 (u64 __user *)reg->addr);
1502 case KVM_REG_S390_GBEA:
1503 r = put_user(vcpu->arch.sie_block->gbea,
1504 (u64 __user *)reg->addr);
1513 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1514 struct kvm_one_reg *reg)
1519 case KVM_REG_S390_TODPR:
1520 r = get_user(vcpu->arch.sie_block->todpr,
1521 (u32 __user *)reg->addr);
1523 case KVM_REG_S390_EPOCHDIFF:
1524 r = get_user(vcpu->arch.sie_block->epoch,
1525 (u64 __user *)reg->addr);
1527 case KVM_REG_S390_CPU_TIMER:
1528 r = get_user(vcpu->arch.sie_block->cputm,
1529 (u64 __user *)reg->addr);
1531 case KVM_REG_S390_CLOCK_COMP:
1532 r = get_user(vcpu->arch.sie_block->ckc,
1533 (u64 __user *)reg->addr);
1535 case KVM_REG_S390_PFTOKEN:
1536 r = get_user(vcpu->arch.pfault_token,
1537 (u64 __user *)reg->addr);
1538 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1539 kvm_clear_async_pf_completion_queue(vcpu);
1541 case KVM_REG_S390_PFCOMPARE:
1542 r = get_user(vcpu->arch.pfault_compare,
1543 (u64 __user *)reg->addr);
1545 case KVM_REG_S390_PFSELECT:
1546 r = get_user(vcpu->arch.pfault_select,
1547 (u64 __user *)reg->addr);
1549 case KVM_REG_S390_PP:
1550 r = get_user(vcpu->arch.sie_block->pp,
1551 (u64 __user *)reg->addr);
1553 case KVM_REG_S390_GBEA:
1554 r = get_user(vcpu->arch.sie_block->gbea,
1555 (u64 __user *)reg->addr);
1564 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1566 kvm_s390_vcpu_initial_reset(vcpu);
1570 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1572 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
1576 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1578 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1582 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1583 struct kvm_sregs *sregs)
1585 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1586 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1587 restore_access_regs(vcpu->run->s.regs.acrs);
1591 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1592 struct kvm_sregs *sregs)
1594 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1595 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1599 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1601 if (test_fp_ctl(fpu->fpc))
1603 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1604 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1605 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1606 restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
1610 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1612 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1613 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1617 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1621 if (!is_vcpu_stopped(vcpu))
1624 vcpu->run->psw_mask = psw.mask;
1625 vcpu->run->psw_addr = psw.addr;
1630 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1631 struct kvm_translation *tr)
1633 return -EINVAL; /* not implemented yet */
1636 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1637 KVM_GUESTDBG_USE_HW_BP | \
1638 KVM_GUESTDBG_ENABLE)
1640 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1641 struct kvm_guest_debug *dbg)
1645 vcpu->guest_debug = 0;
1646 kvm_s390_clear_bp_data(vcpu);
1648 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1651 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1652 vcpu->guest_debug = dbg->control;
1653 /* enforce guest PER */
1654 atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1656 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1657 rc = kvm_s390_import_bp_data(vcpu, dbg);
1659 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1660 vcpu->arch.guestdbg.last_bp = 0;
1664 vcpu->guest_debug = 0;
1665 kvm_s390_clear_bp_data(vcpu);
1666 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1672 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1673 struct kvm_mp_state *mp_state)
1675 /* CHECK_STOP and LOAD are not supported yet */
1676 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1677 KVM_MP_STATE_OPERATING;
1680 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1681 struct kvm_mp_state *mp_state)
1685 /* user space knows about this interface - let it control the state */
1686 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1688 switch (mp_state->mp_state) {
1689 case KVM_MP_STATE_STOPPED:
1690 kvm_s390_vcpu_stop(vcpu);
1692 case KVM_MP_STATE_OPERATING:
1693 kvm_s390_vcpu_start(vcpu);
1695 case KVM_MP_STATE_LOAD:
1696 case KVM_MP_STATE_CHECK_STOP:
1697 /* fall through - CHECK_STOP and LOAD are not supported yet */
1705 bool kvm_s390_cmma_enabled(struct kvm *kvm)
1707 if (!MACHINE_IS_LPAR)
1709 /* only enable for z10 and later */
1710 if (!MACHINE_HAS_EDAT1)
1712 if (!kvm->arch.use_cmma)
1717 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1719 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1722 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1725 s390_vcpu_unblock(vcpu);
1727 * We use MMU_RELOAD just to re-arm the ipte notifier for the
1728 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1729 * This ensures that the ipte instruction for this request has
1730 * already finished. We might race against a second unmapper that
1731 * wants to set the blocking bit. Lets just retry the request loop.
1733 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1735 rc = gmap_ipte_notify(vcpu->arch.gmap,
1736 kvm_s390_get_prefix(vcpu),
1743 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1744 vcpu->arch.sie_block->ihcpu = 0xffff;
1748 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1749 if (!ibs_enabled(vcpu)) {
1750 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1751 atomic_set_mask(CPUSTAT_IBS,
1752 &vcpu->arch.sie_block->cpuflags);
1757 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1758 if (ibs_enabled(vcpu)) {
1759 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1760 atomic_clear_mask(CPUSTAT_IBS,
1761 &vcpu->arch.sie_block->cpuflags);
1766 /* nothing to do, just clear the request */
1767 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1773 * kvm_arch_fault_in_page - fault-in guest page if necessary
1774 * @vcpu: The corresponding virtual cpu
1775 * @gpa: Guest physical address
1776 * @writable: Whether the page should be writable or not
1778 * Make sure that a guest page has been faulted-in on the host.
1780 * Return: Zero on success, negative error code otherwise.
1782 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1784 return gmap_fault(vcpu->arch.gmap, gpa,
1785 writable ? FAULT_FLAG_WRITE : 0);
1788 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1789 unsigned long token)
1791 struct kvm_s390_interrupt inti;
1792 struct kvm_s390_irq irq;
1795 irq.u.ext.ext_params2 = token;
1796 irq.type = KVM_S390_INT_PFAULT_INIT;
1797 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1799 inti.type = KVM_S390_INT_PFAULT_DONE;
1800 inti.parm64 = token;
1801 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1805 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1806 struct kvm_async_pf *work)
1808 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1809 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1812 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1813 struct kvm_async_pf *work)
1815 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1816 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1819 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1820 struct kvm_async_pf *work)
1822 /* s390 will always inject the page directly */
1825 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1828 * s390 will always inject the page directly,
1829 * but we still want check_async_completion to cleanup
1834 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1837 struct kvm_arch_async_pf arch;
1840 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1842 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1843 vcpu->arch.pfault_compare)
1845 if (psw_extint_disabled(vcpu))
1847 if (kvm_s390_vcpu_has_irq(vcpu, 0))
1849 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1851 if (!vcpu->arch.gmap->pfault_enabled)
1854 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1855 hva += current->thread.gmap_addr & ~PAGE_MASK;
1856 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1859 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1863 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1868 * On s390 notifications for arriving pages will be delivered directly
1869 * to the guest but the house keeping for completed pfaults is
1870 * handled outside the worker.
1872 kvm_check_async_pf_completion(vcpu);
1874 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
1879 if (test_cpu_flag(CIF_MCCK_PENDING))
1882 if (!kvm_is_ucontrol(vcpu->kvm)) {
1883 rc = kvm_s390_deliver_pending_interrupts(vcpu);
1888 rc = kvm_s390_handle_requests(vcpu);
1892 if (guestdbg_enabled(vcpu)) {
1893 kvm_s390_backup_guest_per_regs(vcpu);
1894 kvm_s390_patch_guest_per_regs(vcpu);
1897 vcpu->arch.sie_block->icptcode = 0;
1898 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
1899 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
1900 trace_kvm_s390_sie_enter(vcpu, cpuflags);
1905 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
1907 psw_t *psw = &vcpu->arch.sie_block->gpsw;
1911 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
1912 trace_kvm_s390_sie_fault(vcpu);
1915 * We want to inject an addressing exception, which is defined as a
1916 * suppressing or terminating exception. However, since we came here
1917 * by a DAT access exception, the PSW still points to the faulting
1918 * instruction since DAT exceptions are nullifying. So we've got
1919 * to look up the current opcode to get the length of the instruction
1920 * to be able to forward the PSW.
1922 rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
1924 return kvm_s390_inject_prog_cond(vcpu, rc);
1925 psw->addr = __rewind_psw(*psw, -insn_length(opcode));
1927 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
1930 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
1934 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
1935 vcpu->arch.sie_block->icptcode);
1936 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
1938 if (guestdbg_enabled(vcpu))
1939 kvm_s390_restore_guest_per_regs(vcpu);
1941 if (exit_reason >= 0) {
1943 } else if (kvm_is_ucontrol(vcpu->kvm)) {
1944 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
1945 vcpu->run->s390_ucontrol.trans_exc_code =
1946 current->thread.gmap_addr;
1947 vcpu->run->s390_ucontrol.pgm_code = 0x10;
1950 } else if (current->thread.gmap_pfault) {
1951 trace_kvm_s390_major_guest_pfault(vcpu);
1952 current->thread.gmap_pfault = 0;
1953 if (kvm_arch_setup_async_pf(vcpu)) {
1956 gpa_t gpa = current->thread.gmap_addr;
1957 rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
1962 rc = vcpu_post_run_fault_in_sie(vcpu);
1964 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
1967 if (kvm_is_ucontrol(vcpu->kvm))
1968 /* Don't exit for host interrupts. */
1969 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
1971 rc = kvm_handle_sie_intercept(vcpu);
1977 static int __vcpu_run(struct kvm_vcpu *vcpu)
1979 int rc, exit_reason;
1982 * We try to hold kvm->srcu during most of vcpu_run (except when run-
1983 * ning the guest), so that memslots (and other stuff) are protected
1985 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1988 rc = vcpu_pre_run(vcpu);
1992 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1994 * As PF_VCPU will be used in fault handler, between
1995 * guest_enter and guest_exit should be no uaccess.
2000 exit_reason = sie64a(vcpu->arch.sie_block,
2001 vcpu->run->s.regs.gprs);
2003 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2005 rc = vcpu_post_run(vcpu, exit_reason);
2006 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2008 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2012 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2014 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2015 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2016 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2017 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2018 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2019 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2020 /* some control register changes require a tlb flush */
2021 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2023 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2024 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2025 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2026 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2027 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2028 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2030 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2031 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2032 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2033 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2034 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2035 kvm_clear_async_pf_completion_queue(vcpu);
2037 kvm_run->kvm_dirty_regs = 0;
2040 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2042 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2043 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2044 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2045 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2046 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2047 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2048 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2049 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2050 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2051 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2052 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2053 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2056 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2061 if (guestdbg_exit_pending(vcpu)) {
2062 kvm_s390_prepare_debug_exit(vcpu);
2066 if (vcpu->sigset_active)
2067 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2069 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2070 kvm_s390_vcpu_start(vcpu);
2071 } else if (is_vcpu_stopped(vcpu)) {
2072 pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
2077 sync_regs(vcpu, kvm_run);
2080 rc = __vcpu_run(vcpu);
2082 if (signal_pending(current) && !rc) {
2083 kvm_run->exit_reason = KVM_EXIT_INTR;
2087 if (guestdbg_exit_pending(vcpu) && !rc) {
2088 kvm_s390_prepare_debug_exit(vcpu);
2092 if (rc == -EOPNOTSUPP) {
2093 /* intercept cannot be handled in-kernel, prepare kvm-run */
2094 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
2095 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2096 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2097 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2101 if (rc == -EREMOTE) {
2102 /* intercept was handled, but userspace support is needed
2103 * kvm_run has been prepared by the handler */
2107 store_regs(vcpu, kvm_run);
2109 if (vcpu->sigset_active)
2110 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2112 vcpu->stat.exit_userspace++;
2117 * store status at address
2118 * we use have two special cases:
2119 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2120 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2122 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2124 unsigned char archmode = 1;
2129 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2130 if (write_guest_abs(vcpu, 163, &archmode, 1))
2132 gpa = SAVE_AREA_BASE;
2133 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2134 if (write_guest_real(vcpu, 163, &archmode, 1))
2136 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2138 rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2139 vcpu->arch.guest_fpregs.fprs, 128);
2140 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2141 vcpu->run->s.regs.gprs, 128);
2142 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2143 &vcpu->arch.sie_block->gpsw, 16);
2144 px = kvm_s390_get_prefix(vcpu);
2145 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2147 rc |= write_guest_abs(vcpu,
2148 gpa + offsetof(struct save_area, fp_ctrl_reg),
2149 &vcpu->arch.guest_fpregs.fpc, 4);
2150 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2151 &vcpu->arch.sie_block->todpr, 4);
2152 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2153 &vcpu->arch.sie_block->cputm, 8);
2154 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2155 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2157 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2158 &vcpu->run->s.regs.acrs, 64);
2159 rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2160 &vcpu->arch.sie_block->gcr, 128);
2161 return rc ? -EFAULT : 0;
2164 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2167 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2168 * copying in vcpu load/put. Lets update our copies before we save
2169 * it into the save area
2171 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
2172 save_fp_regs(vcpu->arch.guest_fpregs.fprs);
2173 save_access_regs(vcpu->run->s.regs.acrs);
2175 return kvm_s390_store_status_unloaded(vcpu, addr);
2179 * store additional status at address
2181 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2184 /* Only bits 0-53 are used for address formation */
2185 if (!(gpa & ~0x3ff))
2188 return write_guest_abs(vcpu, gpa & ~0x3ff,
2189 (void *)&vcpu->run->s.regs.vrs, 512);
2192 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2194 if (!test_kvm_facility(vcpu->kvm, 129))
2198 * The guest VXRS are in the host VXRs due to the lazy
2199 * copying in vcpu load/put. Let's update our copies before we save
2200 * it into the save area.
2202 save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
2204 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2207 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2209 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2210 kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
2211 exit_sie_sync(vcpu);
2214 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2217 struct kvm_vcpu *vcpu;
2219 kvm_for_each_vcpu(i, vcpu, kvm) {
2220 __disable_ibs_on_vcpu(vcpu);
2224 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2226 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2227 kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
2228 exit_sie_sync(vcpu);
2231 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2233 int i, online_vcpus, started_vcpus = 0;
2235 if (!is_vcpu_stopped(vcpu))
2238 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2239 /* Only one cpu at a time may enter/leave the STOPPED state. */
2240 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2241 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2243 for (i = 0; i < online_vcpus; i++) {
2244 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2248 if (started_vcpus == 0) {
2249 /* we're the only active VCPU -> speed it up */
2250 __enable_ibs_on_vcpu(vcpu);
2251 } else if (started_vcpus == 1) {
2253 * As we are starting a second VCPU, we have to disable
2254 * the IBS facility on all VCPUs to remove potentially
2255 * oustanding ENABLE requests.
2257 __disable_ibs_on_all_vcpus(vcpu->kvm);
2260 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2262 * Another VCPU might have used IBS while we were offline.
2263 * Let's play safe and flush the VCPU at startup.
2265 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2266 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2270 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2272 int i, online_vcpus, started_vcpus = 0;
2273 struct kvm_vcpu *started_vcpu = NULL;
2275 if (is_vcpu_stopped(vcpu))
2278 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2279 /* Only one cpu at a time may enter/leave the STOPPED state. */
2280 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2281 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2283 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2284 kvm_s390_clear_stop_irq(vcpu);
2286 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2287 __disable_ibs_on_vcpu(vcpu);
2289 for (i = 0; i < online_vcpus; i++) {
2290 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2292 started_vcpu = vcpu->kvm->vcpus[i];
2296 if (started_vcpus == 1) {
2298 * As we only have one VCPU left, we want to enable the
2299 * IBS facility for that VCPU to speed it up.
2301 __enable_ibs_on_vcpu(started_vcpu);
2304 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2308 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2309 struct kvm_enable_cap *cap)
2317 case KVM_CAP_S390_CSS_SUPPORT:
2318 if (!vcpu->kvm->arch.css_support) {
2319 vcpu->kvm->arch.css_support = 1;
2320 trace_kvm_s390_enable_css(vcpu->kvm);
2331 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2332 struct kvm_s390_mem_op *mop)
2334 void __user *uaddr = (void __user *)mop->buf;
2335 void *tmpbuf = NULL;
2337 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2338 | KVM_S390_MEMOP_F_CHECK_ONLY;
2340 if (mop->flags & ~supported_flags)
2343 if (mop->size > MEM_OP_MAX_SIZE)
2346 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2347 tmpbuf = vmalloc(mop->size);
2352 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2355 case KVM_S390_MEMOP_LOGICAL_READ:
2356 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2357 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2360 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2362 if (copy_to_user(uaddr, tmpbuf, mop->size))
2366 case KVM_S390_MEMOP_LOGICAL_WRITE:
2367 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2368 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2371 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2375 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2381 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2383 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2384 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2390 long kvm_arch_vcpu_ioctl(struct file *filp,
2391 unsigned int ioctl, unsigned long arg)
2393 struct kvm_vcpu *vcpu = filp->private_data;
2394 void __user *argp = (void __user *)arg;
2399 case KVM_S390_IRQ: {
2400 struct kvm_s390_irq s390irq;
2403 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2405 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2408 case KVM_S390_INTERRUPT: {
2409 struct kvm_s390_interrupt s390int;
2410 struct kvm_s390_irq s390irq;
2413 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2415 if (s390int_to_s390irq(&s390int, &s390irq))
2417 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2420 case KVM_S390_STORE_STATUS:
2421 idx = srcu_read_lock(&vcpu->kvm->srcu);
2422 r = kvm_s390_vcpu_store_status(vcpu, arg);
2423 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2425 case KVM_S390_SET_INITIAL_PSW: {
2429 if (copy_from_user(&psw, argp, sizeof(psw)))
2431 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2434 case KVM_S390_INITIAL_RESET:
2435 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2437 case KVM_SET_ONE_REG:
2438 case KVM_GET_ONE_REG: {
2439 struct kvm_one_reg reg;
2441 if (copy_from_user(®, argp, sizeof(reg)))
2443 if (ioctl == KVM_SET_ONE_REG)
2444 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
2446 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
2449 #ifdef CONFIG_KVM_S390_UCONTROL
2450 case KVM_S390_UCAS_MAP: {
2451 struct kvm_s390_ucas_mapping ucasmap;
2453 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2458 if (!kvm_is_ucontrol(vcpu->kvm)) {
2463 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2464 ucasmap.vcpu_addr, ucasmap.length);
2467 case KVM_S390_UCAS_UNMAP: {
2468 struct kvm_s390_ucas_mapping ucasmap;
2470 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2475 if (!kvm_is_ucontrol(vcpu->kvm)) {
2480 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2485 case KVM_S390_VCPU_FAULT: {
2486 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2489 case KVM_ENABLE_CAP:
2491 struct kvm_enable_cap cap;
2493 if (copy_from_user(&cap, argp, sizeof(cap)))
2495 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2498 case KVM_S390_MEM_OP: {
2499 struct kvm_s390_mem_op mem_op;
2501 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2502 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2507 case KVM_S390_SET_IRQ_STATE: {
2508 struct kvm_s390_irq_state irq_state;
2511 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2513 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2514 irq_state.len == 0 ||
2515 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2519 r = kvm_s390_set_irq_state(vcpu,
2520 (void __user *) irq_state.buf,
2524 case KVM_S390_GET_IRQ_STATE: {
2525 struct kvm_s390_irq_state irq_state;
2528 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2530 if (irq_state.len == 0) {
2534 r = kvm_s390_get_irq_state(vcpu,
2535 (__u8 __user *) irq_state.buf,
2545 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2547 #ifdef CONFIG_KVM_S390_UCONTROL
2548 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2549 && (kvm_is_ucontrol(vcpu->kvm))) {
2550 vmf->page = virt_to_page(vcpu->arch.sie_block);
2551 get_page(vmf->page);
2555 return VM_FAULT_SIGBUS;
2558 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2559 unsigned long npages)
2564 /* Section: memory related */
2565 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2566 struct kvm_memory_slot *memslot,
2567 struct kvm_userspace_memory_region *mem,
2568 enum kvm_mr_change change)
2570 /* A few sanity checks. We can have memory slots which have to be
2571 located/ended at a segment boundary (1MB). The memory in userland is
2572 ok to be fragmented into various different vmas. It is okay to mmap()
2573 and munmap() stuff in this slot after doing this call at any time */
2575 if (mem->userspace_addr & 0xffffful)
2578 if (mem->memory_size & 0xffffful)
2584 void kvm_arch_commit_memory_region(struct kvm *kvm,
2585 struct kvm_userspace_memory_region *mem,
2586 const struct kvm_memory_slot *old,
2587 enum kvm_mr_change change)
2591 /* If the basics of the memslot do not change, we do not want
2592 * to update the gmap. Every update causes several unnecessary
2593 * segment translation exceptions. This is usually handled just
2594 * fine by the normal fault handler + gmap, but it will also
2595 * cause faults on the prefix page of running guest CPUs.
2597 if (old->userspace_addr == mem->userspace_addr &&
2598 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2599 old->npages * PAGE_SIZE == mem->memory_size)
2602 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2603 mem->guest_phys_addr, mem->memory_size);
2605 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
2609 static int __init kvm_s390_init(void)
2611 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2614 static void __exit kvm_s390_exit(void)
2619 module_init(kvm_s390_init);
2620 module_exit(kvm_s390_exit);
2623 * Enable autoloading of the kvm module.
2624 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2625 * since x86 takes a different approach.
2627 #include <linux/miscdevice.h>
2628 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2629 MODULE_ALIAS("devname:kvm");