1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2017
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #include <linux/compiler.h>
15 #include <linux/err.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
35 #include <asm/pgtable.h>
38 #include <asm/switch_to.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
46 #define KMSG_COMPONENT "kvm-s390"
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
50 #define CREATE_TRACE_POINTS
52 #include "trace-s390.h"
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 { "userspace_handled", VCPU_STAT(exit_userspace) },
63 { "exit_null", VCPU_STAT(exit_null) },
64 { "exit_validity", VCPU_STAT(exit_validity) },
65 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66 { "exit_external_request", VCPU_STAT(exit_external_request) },
67 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68 { "exit_instruction", VCPU_STAT(exit_instruction) },
69 { "exit_pei", VCPU_STAT(exit_pei) },
70 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92 { "instruction_spx", VCPU_STAT(instruction_spx) },
93 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94 { "instruction_stap", VCPU_STAT(instruction_stap) },
95 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99 { "instruction_essa", VCPU_STAT(instruction_essa) },
100 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104 { "instruction_sie", VCPU_STAT(instruction_sie) },
105 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121 { "diagnose_10", VCPU_STAT(diagnose_10) },
122 { "diagnose_44", VCPU_STAT(diagnose_44) },
123 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124 { "diagnose_258", VCPU_STAT(diagnose_258) },
125 { "diagnose_308", VCPU_STAT(diagnose_308) },
126 { "diagnose_500", VCPU_STAT(diagnose_500) },
130 struct kvm_s390_tod_clock_ext {
136 /* allow nested virtualization in KVM (if enabled by user space) */
138 module_param(nested, int, S_IRUGO);
139 MODULE_PARM_DESC(nested, "Nested virtualization support");
141 /* upper facilities limit for kvm */
142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
144 unsigned long kvm_s390_fac_list_mask_size(void)
146 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
147 return ARRAY_SIZE(kvm_s390_fac_list_mask);
150 /* available cpu features supported by kvm */
151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
152 /* available subfunctions indicated via query / "test bit" */
153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
155 static struct gmap_notifier gmap_notifier;
156 static struct gmap_notifier vsie_gmap_notifier;
157 debug_info_t *kvm_s390_dbf;
159 /* Section: not file related */
160 int kvm_arch_hardware_enable(void)
162 /* every s390 is virtualization enabled ;-) */
166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
170 * This callback is executed during stop_machine(). All CPUs are therefore
171 * temporarily stopped. In order not to change guest behavior, we have to
172 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
173 * so a CPU won't be stopped while calculating with the epoch.
175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
179 struct kvm_vcpu *vcpu;
181 unsigned long long *delta = v;
183 list_for_each_entry(kvm, &vm_list, vm_list) {
184 kvm->arch.epoch -= *delta;
185 kvm_for_each_vcpu(i, vcpu, kvm) {
186 vcpu->arch.sie_block->epoch -= *delta;
187 if (vcpu->arch.cputm_enabled)
188 vcpu->arch.cputm_start += *delta;
189 if (vcpu->arch.vsie_block)
190 vcpu->arch.vsie_block->epoch -= *delta;
196 static struct notifier_block kvm_clock_notifier = {
197 .notifier_call = kvm_clock_sync,
200 int kvm_arch_hardware_setup(void)
202 gmap_notifier.notifier_call = kvm_gmap_notifier;
203 gmap_register_pte_notifier(&gmap_notifier);
204 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
205 gmap_register_pte_notifier(&vsie_gmap_notifier);
206 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
207 &kvm_clock_notifier);
211 void kvm_arch_hardware_unsetup(void)
213 gmap_unregister_pte_notifier(&gmap_notifier);
214 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
215 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
216 &kvm_clock_notifier);
219 static void allow_cpu_feat(unsigned long nr)
221 set_bit_inv(nr, kvm_s390_available_cpu_feat);
224 static inline int plo_test_bit(unsigned char nr)
226 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
230 /* Parameter registers are ignored for "test bit" */
240 static void kvm_s390_cpu_feat_init(void)
244 for (i = 0; i < 256; ++i) {
246 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
249 if (test_facility(28)) /* TOD-clock steering */
250 ptff(kvm_s390_available_subfunc.ptff,
251 sizeof(kvm_s390_available_subfunc.ptff),
254 if (test_facility(17)) { /* MSA */
255 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
256 kvm_s390_available_subfunc.kmac);
257 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
258 kvm_s390_available_subfunc.kmc);
259 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
260 kvm_s390_available_subfunc.km);
261 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
262 kvm_s390_available_subfunc.kimd);
263 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
264 kvm_s390_available_subfunc.klmd);
266 if (test_facility(76)) /* MSA3 */
267 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.pckmo);
269 if (test_facility(77)) { /* MSA4 */
270 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.kmctr);
272 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
273 kvm_s390_available_subfunc.kmf);
274 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
275 kvm_s390_available_subfunc.kmo);
276 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
277 kvm_s390_available_subfunc.pcc);
279 if (test_facility(57)) /* MSA5 */
280 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
281 kvm_s390_available_subfunc.ppno);
283 if (test_facility(146)) /* MSA8 */
284 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
285 kvm_s390_available_subfunc.kma);
287 if (MACHINE_HAS_ESOP)
288 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
290 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
291 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
293 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
294 !test_facility(3) || !nested)
296 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
297 if (sclp.has_64bscao)
298 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
300 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
302 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
304 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
306 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
308 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
310 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
312 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
314 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
315 * all skey handling functions read/set the skey from the PGSTE
316 * instead of the real storage key.
318 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
319 * pages being detected as preserved although they are resident.
321 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
322 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
324 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
325 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
326 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
328 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
329 * cannot easily shadow the SCA because of the ipte lock.
333 int kvm_arch_init(void *opaque)
335 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
339 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
340 debug_unregister(kvm_s390_dbf);
344 kvm_s390_cpu_feat_init();
346 /* Register floating interrupt controller interface. */
347 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
350 void kvm_arch_exit(void)
352 debug_unregister(kvm_s390_dbf);
355 /* Section: device related */
356 long kvm_arch_dev_ioctl(struct file *filp,
357 unsigned int ioctl, unsigned long arg)
359 if (ioctl == KVM_S390_ENABLE_SIE)
360 return s390_enable_sie();
364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
369 case KVM_CAP_S390_PSW:
370 case KVM_CAP_S390_GMAP:
371 case KVM_CAP_SYNC_MMU:
372 #ifdef CONFIG_KVM_S390_UCONTROL
373 case KVM_CAP_S390_UCONTROL:
375 case KVM_CAP_ASYNC_PF:
376 case KVM_CAP_SYNC_REGS:
377 case KVM_CAP_ONE_REG:
378 case KVM_CAP_ENABLE_CAP:
379 case KVM_CAP_S390_CSS_SUPPORT:
380 case KVM_CAP_IOEVENTFD:
381 case KVM_CAP_DEVICE_CTRL:
382 case KVM_CAP_ENABLE_CAP_VM:
383 case KVM_CAP_S390_IRQCHIP:
384 case KVM_CAP_VM_ATTRIBUTES:
385 case KVM_CAP_MP_STATE:
386 case KVM_CAP_IMMEDIATE_EXIT:
387 case KVM_CAP_S390_INJECT_IRQ:
388 case KVM_CAP_S390_USER_SIGP:
389 case KVM_CAP_S390_USER_STSI:
390 case KVM_CAP_S390_SKEYS:
391 case KVM_CAP_S390_IRQ_STATE:
392 case KVM_CAP_S390_USER_INSTR0:
393 case KVM_CAP_S390_CMMA_MIGRATION:
394 case KVM_CAP_S390_AIS:
395 case KVM_CAP_S390_AIS_MIGRATION:
398 case KVM_CAP_S390_MEM_OP:
401 case KVM_CAP_NR_VCPUS:
402 case KVM_CAP_MAX_VCPUS:
403 r = KVM_S390_BSCA_CPU_SLOTS;
404 if (!kvm_s390_use_sca_entries())
406 else if (sclp.has_esca && sclp.has_64bscao)
407 r = KVM_S390_ESCA_CPU_SLOTS;
409 case KVM_CAP_NR_MEMSLOTS:
410 r = KVM_USER_MEM_SLOTS;
412 case KVM_CAP_S390_COW:
413 r = MACHINE_HAS_ESOP;
415 case KVM_CAP_S390_VECTOR_REGISTERS:
418 case KVM_CAP_S390_RI:
419 r = test_facility(64);
421 case KVM_CAP_S390_GS:
422 r = test_facility(133);
430 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
431 struct kvm_memory_slot *memslot)
433 gfn_t cur_gfn, last_gfn;
434 unsigned long address;
435 struct gmap *gmap = kvm->arch.gmap;
437 /* Loop over all guest pages */
438 last_gfn = memslot->base_gfn + memslot->npages;
439 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
440 address = gfn_to_hva_memslot(memslot, cur_gfn);
442 if (test_and_clear_guest_dirty(gmap->mm, address))
443 mark_page_dirty(kvm, cur_gfn);
444 if (fatal_signal_pending(current))
450 /* Section: vm related */
451 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
454 * Get (and clear) the dirty memory log for a memory slot.
456 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
457 struct kvm_dirty_log *log)
461 struct kvm_memslots *slots;
462 struct kvm_memory_slot *memslot;
465 if (kvm_is_ucontrol(kvm))
468 mutex_lock(&kvm->slots_lock);
471 if (log->slot >= KVM_USER_MEM_SLOTS)
474 slots = kvm_memslots(kvm);
475 memslot = id_to_memslot(slots, log->slot);
477 if (!memslot->dirty_bitmap)
480 kvm_s390_sync_dirty_log(kvm, memslot);
481 r = kvm_get_dirty_log(kvm, log, &is_dirty);
485 /* Clear the dirty log */
487 n = kvm_dirty_bitmap_bytes(memslot);
488 memset(memslot->dirty_bitmap, 0, n);
492 mutex_unlock(&kvm->slots_lock);
496 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
499 struct kvm_vcpu *vcpu;
501 kvm_for_each_vcpu(i, vcpu, kvm) {
502 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
506 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
514 case KVM_CAP_S390_IRQCHIP:
515 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
516 kvm->arch.use_irqchip = 1;
519 case KVM_CAP_S390_USER_SIGP:
520 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
521 kvm->arch.user_sigp = 1;
524 case KVM_CAP_S390_VECTOR_REGISTERS:
525 mutex_lock(&kvm->lock);
526 if (kvm->created_vcpus) {
528 } else if (MACHINE_HAS_VX) {
529 set_kvm_facility(kvm->arch.model.fac_mask, 129);
530 set_kvm_facility(kvm->arch.model.fac_list, 129);
531 if (test_facility(134)) {
532 set_kvm_facility(kvm->arch.model.fac_mask, 134);
533 set_kvm_facility(kvm->arch.model.fac_list, 134);
535 if (test_facility(135)) {
536 set_kvm_facility(kvm->arch.model.fac_mask, 135);
537 set_kvm_facility(kvm->arch.model.fac_list, 135);
542 mutex_unlock(&kvm->lock);
543 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
544 r ? "(not available)" : "(success)");
546 case KVM_CAP_S390_RI:
548 mutex_lock(&kvm->lock);
549 if (kvm->created_vcpus) {
551 } else if (test_facility(64)) {
552 set_kvm_facility(kvm->arch.model.fac_mask, 64);
553 set_kvm_facility(kvm->arch.model.fac_list, 64);
556 mutex_unlock(&kvm->lock);
557 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
558 r ? "(not available)" : "(success)");
560 case KVM_CAP_S390_AIS:
561 mutex_lock(&kvm->lock);
562 if (kvm->created_vcpus) {
565 set_kvm_facility(kvm->arch.model.fac_mask, 72);
566 set_kvm_facility(kvm->arch.model.fac_list, 72);
569 mutex_unlock(&kvm->lock);
570 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
571 r ? "(not available)" : "(success)");
573 case KVM_CAP_S390_GS:
575 mutex_lock(&kvm->lock);
576 if (atomic_read(&kvm->online_vcpus)) {
578 } else if (test_facility(133)) {
579 set_kvm_facility(kvm->arch.model.fac_mask, 133);
580 set_kvm_facility(kvm->arch.model.fac_list, 133);
583 mutex_unlock(&kvm->lock);
584 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
585 r ? "(not available)" : "(success)");
587 case KVM_CAP_S390_USER_STSI:
588 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
589 kvm->arch.user_stsi = 1;
592 case KVM_CAP_S390_USER_INSTR0:
593 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
594 kvm->arch.user_instr0 = 1;
595 icpt_operexc_on_all_vcpus(kvm);
605 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
609 switch (attr->attr) {
610 case KVM_S390_VM_MEM_LIMIT_SIZE:
612 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
613 kvm->arch.mem_limit);
614 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
624 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
628 switch (attr->attr) {
629 case KVM_S390_VM_MEM_ENABLE_CMMA:
635 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
636 mutex_lock(&kvm->lock);
637 if (!kvm->created_vcpus) {
638 kvm->arch.use_cmma = 1;
641 mutex_unlock(&kvm->lock);
643 case KVM_S390_VM_MEM_CLR_CMMA:
648 if (!kvm->arch.use_cmma)
651 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
652 mutex_lock(&kvm->lock);
653 idx = srcu_read_lock(&kvm->srcu);
654 s390_reset_cmma(kvm->arch.gmap->mm);
655 srcu_read_unlock(&kvm->srcu, idx);
656 mutex_unlock(&kvm->lock);
659 case KVM_S390_VM_MEM_LIMIT_SIZE: {
660 unsigned long new_limit;
662 if (kvm_is_ucontrol(kvm))
665 if (get_user(new_limit, (u64 __user *)attr->addr))
668 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
669 new_limit > kvm->arch.mem_limit)
675 /* gmap_create takes last usable address */
676 if (new_limit != KVM_S390_NO_MEM_LIMIT)
680 mutex_lock(&kvm->lock);
681 if (!kvm->created_vcpus) {
682 /* gmap_create will round the limit up */
683 struct gmap *new = gmap_create(current->mm, new_limit);
688 gmap_remove(kvm->arch.gmap);
690 kvm->arch.gmap = new;
694 mutex_unlock(&kvm->lock);
695 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
696 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
697 (void *) kvm->arch.gmap->asce);
707 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
709 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
711 struct kvm_vcpu *vcpu;
714 if (!test_kvm_facility(kvm, 76))
717 mutex_lock(&kvm->lock);
718 switch (attr->attr) {
719 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
721 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
722 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
723 kvm->arch.crypto.aes_kw = 1;
724 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
726 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
728 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
729 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
730 kvm->arch.crypto.dea_kw = 1;
731 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
733 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
734 kvm->arch.crypto.aes_kw = 0;
735 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
736 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
737 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
739 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
740 kvm->arch.crypto.dea_kw = 0;
741 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
742 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
743 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
746 mutex_unlock(&kvm->lock);
750 kvm_for_each_vcpu(i, vcpu, kvm) {
751 kvm_s390_vcpu_crypto_setup(vcpu);
754 mutex_unlock(&kvm->lock);
758 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
761 struct kvm_vcpu *vcpu;
763 kvm_for_each_vcpu(cx, vcpu, kvm)
764 kvm_s390_sync_request(req, vcpu);
768 * Must be called with kvm->srcu held to avoid races on memslots, and with
769 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
771 static int kvm_s390_vm_start_migration(struct kvm *kvm)
773 struct kvm_s390_migration_state *mgs;
774 struct kvm_memory_slot *ms;
775 /* should be the only one */
776 struct kvm_memslots *slots;
777 unsigned long ram_pages;
780 /* migration mode already enabled */
781 if (kvm->arch.migration_state)
784 slots = kvm_memslots(kvm);
785 if (!slots || !slots->used_slots)
788 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
791 kvm->arch.migration_state = mgs;
793 if (kvm->arch.use_cmma) {
795 * Get the first slot. They are reverse sorted by base_gfn, so
796 * the first slot is also the one at the end of the address
797 * space. We have verified above that at least one slot is
800 ms = slots->memslots;
801 /* round up so we only use full longs */
802 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
803 /* allocate enough bytes to store all the bits */
804 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
805 if (!mgs->pgste_bitmap) {
807 kvm->arch.migration_state = NULL;
811 mgs->bitmap_size = ram_pages;
812 atomic64_set(&mgs->dirty_pages, ram_pages);
813 /* mark all the pages in active slots as dirty */
814 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
815 ms = slots->memslots + slotnr;
816 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
819 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
825 * Must be called with kvm->slots_lock to avoid races with ourselves and
826 * kvm_s390_vm_start_migration.
828 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
830 struct kvm_s390_migration_state *mgs;
832 /* migration mode already disabled */
833 if (!kvm->arch.migration_state)
835 mgs = kvm->arch.migration_state;
836 kvm->arch.migration_state = NULL;
838 if (kvm->arch.use_cmma) {
839 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
840 /* We have to wait for the essa emulation to finish */
841 synchronize_srcu(&kvm->srcu);
842 vfree(mgs->pgste_bitmap);
848 static int kvm_s390_vm_set_migration(struct kvm *kvm,
849 struct kvm_device_attr *attr)
853 mutex_lock(&kvm->slots_lock);
854 switch (attr->attr) {
855 case KVM_S390_VM_MIGRATION_START:
856 res = kvm_s390_vm_start_migration(kvm);
858 case KVM_S390_VM_MIGRATION_STOP:
859 res = kvm_s390_vm_stop_migration(kvm);
864 mutex_unlock(&kvm->slots_lock);
869 static int kvm_s390_vm_get_migration(struct kvm *kvm,
870 struct kvm_device_attr *attr)
872 u64 mig = (kvm->arch.migration_state != NULL);
874 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
877 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
882 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
884 struct kvm_s390_vm_tod_clock gtod;
886 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
889 if (test_kvm_facility(kvm, 139))
890 kvm_s390_set_tod_clock_ext(kvm, >od);
891 else if (gtod.epoch_idx == 0)
892 kvm_s390_set_tod_clock(kvm, gtod.tod);
896 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
897 gtod.epoch_idx, gtod.tod);
902 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
906 if (copy_from_user(>od_high, (void __user *)attr->addr,
912 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
917 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
921 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
924 kvm_s390_set_tod_clock(kvm, gtod);
925 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
929 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
936 switch (attr->attr) {
937 case KVM_S390_VM_TOD_EXT:
938 ret = kvm_s390_set_tod_ext(kvm, attr);
940 case KVM_S390_VM_TOD_HIGH:
941 ret = kvm_s390_set_tod_high(kvm, attr);
943 case KVM_S390_VM_TOD_LOW:
944 ret = kvm_s390_set_tod_low(kvm, attr);
953 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
954 struct kvm_s390_vm_tod_clock *gtod)
956 struct kvm_s390_tod_clock_ext htod;
960 get_tod_clock_ext((char *)&htod);
962 gtod->tod = htod.tod + kvm->arch.epoch;
963 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
965 if (gtod->tod < htod.tod)
966 gtod->epoch_idx += 1;
971 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
973 struct kvm_s390_vm_tod_clock gtod;
975 memset(>od, 0, sizeof(gtod));
977 if (test_kvm_facility(kvm, 139))
978 kvm_s390_get_tod_clock_ext(kvm, >od);
980 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
982 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
985 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
986 gtod.epoch_idx, gtod.tod);
990 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
994 if (copy_to_user((void __user *)attr->addr, >od_high,
997 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1002 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1006 gtod = kvm_s390_get_tod_clock_fast(kvm);
1007 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1009 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1014 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1021 switch (attr->attr) {
1022 case KVM_S390_VM_TOD_EXT:
1023 ret = kvm_s390_get_tod_ext(kvm, attr);
1025 case KVM_S390_VM_TOD_HIGH:
1026 ret = kvm_s390_get_tod_high(kvm, attr);
1028 case KVM_S390_VM_TOD_LOW:
1029 ret = kvm_s390_get_tod_low(kvm, attr);
1038 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1040 struct kvm_s390_vm_cpu_processor *proc;
1041 u16 lowest_ibc, unblocked_ibc;
1044 mutex_lock(&kvm->lock);
1045 if (kvm->created_vcpus) {
1049 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1054 if (!copy_from_user(proc, (void __user *)attr->addr,
1056 kvm->arch.model.cpuid = proc->cpuid;
1057 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1058 unblocked_ibc = sclp.ibc & 0xfff;
1059 if (lowest_ibc && proc->ibc) {
1060 if (proc->ibc > unblocked_ibc)
1061 kvm->arch.model.ibc = unblocked_ibc;
1062 else if (proc->ibc < lowest_ibc)
1063 kvm->arch.model.ibc = lowest_ibc;
1065 kvm->arch.model.ibc = proc->ibc;
1067 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1068 S390_ARCH_FAC_LIST_SIZE_BYTE);
1069 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1070 kvm->arch.model.ibc,
1071 kvm->arch.model.cpuid);
1072 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1073 kvm->arch.model.fac_list[0],
1074 kvm->arch.model.fac_list[1],
1075 kvm->arch.model.fac_list[2]);
1080 mutex_unlock(&kvm->lock);
1084 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1085 struct kvm_device_attr *attr)
1087 struct kvm_s390_vm_cpu_feat data;
1090 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1092 if (!bitmap_subset((unsigned long *) data.feat,
1093 kvm_s390_available_cpu_feat,
1094 KVM_S390_VM_CPU_FEAT_NR_BITS))
1097 mutex_lock(&kvm->lock);
1098 if (!atomic_read(&kvm->online_vcpus)) {
1099 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1100 KVM_S390_VM_CPU_FEAT_NR_BITS);
1103 mutex_unlock(&kvm->lock);
1107 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1108 struct kvm_device_attr *attr)
1111 * Once supported by kernel + hw, we have to store the subfunctions
1112 * in kvm->arch and remember that user space configured them.
1117 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1121 switch (attr->attr) {
1122 case KVM_S390_VM_CPU_PROCESSOR:
1123 ret = kvm_s390_set_processor(kvm, attr);
1125 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1126 ret = kvm_s390_set_processor_feat(kvm, attr);
1128 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1129 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1135 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1137 struct kvm_s390_vm_cpu_processor *proc;
1140 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1145 proc->cpuid = kvm->arch.model.cpuid;
1146 proc->ibc = kvm->arch.model.ibc;
1147 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1148 S390_ARCH_FAC_LIST_SIZE_BYTE);
1149 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1150 kvm->arch.model.ibc,
1151 kvm->arch.model.cpuid);
1152 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1153 kvm->arch.model.fac_list[0],
1154 kvm->arch.model.fac_list[1],
1155 kvm->arch.model.fac_list[2]);
1156 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1163 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1165 struct kvm_s390_vm_cpu_machine *mach;
1168 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1173 get_cpu_id((struct cpuid *) &mach->cpuid);
1174 mach->ibc = sclp.ibc;
1175 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1176 S390_ARCH_FAC_LIST_SIZE_BYTE);
1177 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1178 sizeof(S390_lowcore.stfle_fac_list));
1179 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1180 kvm->arch.model.ibc,
1181 kvm->arch.model.cpuid);
1182 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1186 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1190 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1197 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1198 struct kvm_device_attr *attr)
1200 struct kvm_s390_vm_cpu_feat data;
1202 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1203 KVM_S390_VM_CPU_FEAT_NR_BITS);
1204 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1209 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1210 struct kvm_device_attr *attr)
1212 struct kvm_s390_vm_cpu_feat data;
1214 bitmap_copy((unsigned long *) data.feat,
1215 kvm_s390_available_cpu_feat,
1216 KVM_S390_VM_CPU_FEAT_NR_BITS);
1217 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1222 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1223 struct kvm_device_attr *attr)
1226 * Once we can actually configure subfunctions (kernel + hw support),
1227 * we have to check if they were already set by user space, if so copy
1228 * them from kvm->arch.
1233 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1234 struct kvm_device_attr *attr)
1236 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1237 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1241 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1245 switch (attr->attr) {
1246 case KVM_S390_VM_CPU_PROCESSOR:
1247 ret = kvm_s390_get_processor(kvm, attr);
1249 case KVM_S390_VM_CPU_MACHINE:
1250 ret = kvm_s390_get_machine(kvm, attr);
1252 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1253 ret = kvm_s390_get_processor_feat(kvm, attr);
1255 case KVM_S390_VM_CPU_MACHINE_FEAT:
1256 ret = kvm_s390_get_machine_feat(kvm, attr);
1258 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1259 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1261 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1262 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1268 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1272 switch (attr->group) {
1273 case KVM_S390_VM_MEM_CTRL:
1274 ret = kvm_s390_set_mem_control(kvm, attr);
1276 case KVM_S390_VM_TOD:
1277 ret = kvm_s390_set_tod(kvm, attr);
1279 case KVM_S390_VM_CPU_MODEL:
1280 ret = kvm_s390_set_cpu_model(kvm, attr);
1282 case KVM_S390_VM_CRYPTO:
1283 ret = kvm_s390_vm_set_crypto(kvm, attr);
1285 case KVM_S390_VM_MIGRATION:
1286 ret = kvm_s390_vm_set_migration(kvm, attr);
1296 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1300 switch (attr->group) {
1301 case KVM_S390_VM_MEM_CTRL:
1302 ret = kvm_s390_get_mem_control(kvm, attr);
1304 case KVM_S390_VM_TOD:
1305 ret = kvm_s390_get_tod(kvm, attr);
1307 case KVM_S390_VM_CPU_MODEL:
1308 ret = kvm_s390_get_cpu_model(kvm, attr);
1310 case KVM_S390_VM_MIGRATION:
1311 ret = kvm_s390_vm_get_migration(kvm, attr);
1321 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1325 switch (attr->group) {
1326 case KVM_S390_VM_MEM_CTRL:
1327 switch (attr->attr) {
1328 case KVM_S390_VM_MEM_ENABLE_CMMA:
1329 case KVM_S390_VM_MEM_CLR_CMMA:
1330 ret = sclp.has_cmma ? 0 : -ENXIO;
1332 case KVM_S390_VM_MEM_LIMIT_SIZE:
1340 case KVM_S390_VM_TOD:
1341 switch (attr->attr) {
1342 case KVM_S390_VM_TOD_LOW:
1343 case KVM_S390_VM_TOD_HIGH:
1351 case KVM_S390_VM_CPU_MODEL:
1352 switch (attr->attr) {
1353 case KVM_S390_VM_CPU_PROCESSOR:
1354 case KVM_S390_VM_CPU_MACHINE:
1355 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1356 case KVM_S390_VM_CPU_MACHINE_FEAT:
1357 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1360 /* configuring subfunctions is not supported yet */
1361 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1367 case KVM_S390_VM_CRYPTO:
1368 switch (attr->attr) {
1369 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1370 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1371 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1372 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1380 case KVM_S390_VM_MIGRATION:
1391 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1395 int srcu_idx, i, r = 0;
1397 if (args->flags != 0)
1400 /* Is this guest using storage keys? */
1401 if (!mm_use_skey(current->mm))
1402 return KVM_S390_GET_SKEYS_NONE;
1404 /* Enforce sane limit on memory allocation */
1405 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1408 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1412 down_read(¤t->mm->mmap_sem);
1413 srcu_idx = srcu_read_lock(&kvm->srcu);
1414 for (i = 0; i < args->count; i++) {
1415 hva = gfn_to_hva(kvm, args->start_gfn + i);
1416 if (kvm_is_error_hva(hva)) {
1421 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1425 srcu_read_unlock(&kvm->srcu, srcu_idx);
1426 up_read(¤t->mm->mmap_sem);
1429 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1430 sizeof(uint8_t) * args->count);
1439 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1443 int srcu_idx, i, r = 0;
1445 if (args->flags != 0)
1448 /* Enforce sane limit on memory allocation */
1449 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1452 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1456 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1457 sizeof(uint8_t) * args->count);
1463 /* Enable storage key handling for the guest */
1464 r = s390_enable_skey();
1468 down_read(¤t->mm->mmap_sem);
1469 srcu_idx = srcu_read_lock(&kvm->srcu);
1470 for (i = 0; i < args->count; i++) {
1471 hva = gfn_to_hva(kvm, args->start_gfn + i);
1472 if (kvm_is_error_hva(hva)) {
1477 /* Lowest order bit is reserved */
1478 if (keys[i] & 0x01) {
1483 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1487 srcu_read_unlock(&kvm->srcu, srcu_idx);
1488 up_read(¤t->mm->mmap_sem);
1495 * Base address and length must be sent at the start of each block, therefore
1496 * it's cheaper to send some clean data, as long as it's less than the size of
1499 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1500 /* for consistency */
1501 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1504 * This function searches for the next page with dirty CMMA attributes, and
1505 * saves the attributes in the buffer up to either the end of the buffer or
1506 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1507 * no trailing clean bytes are saved.
1508 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1509 * output buffer will indicate 0 as length.
1511 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1512 struct kvm_s390_cmma_log *args)
1514 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1515 unsigned long bufsize, hva, pgstev, i, next, cur;
1516 int srcu_idx, peek, r = 0, rr;
1519 cur = args->start_gfn;
1520 i = next = pgstev = 0;
1522 if (unlikely(!kvm->arch.use_cmma))
1524 /* Invalid/unsupported flags were specified */
1525 if (args->flags & ~KVM_S390_CMMA_PEEK)
1527 /* Migration mode query, and we are not doing a migration */
1528 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1531 /* CMMA is disabled or was not used, or the buffer has length zero */
1532 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1533 if (!bufsize || !kvm->mm->context.use_cmma) {
1534 memset(args, 0, sizeof(*args));
1539 /* We are not peeking, and there are no dirty pages */
1540 if (!atomic64_read(&s->dirty_pages)) {
1541 memset(args, 0, sizeof(*args));
1544 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1546 if (cur >= s->bitmap_size) /* nothing found, loop back */
1547 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1548 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1549 memset(args, 0, sizeof(*args));
1552 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1555 res = vmalloc(bufsize);
1559 args->start_gfn = cur;
1561 down_read(&kvm->mm->mmap_sem);
1562 srcu_idx = srcu_read_lock(&kvm->srcu);
1563 while (i < bufsize) {
1564 hva = gfn_to_hva(kvm, cur);
1565 if (kvm_is_error_hva(hva)) {
1569 /* decrement only if we actually flipped the bit to 0 */
1570 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1571 atomic64_dec(&s->dirty_pages);
1572 r = get_pgste(kvm->mm, hva, &pgstev);
1575 /* save the value */
1576 res[i++] = (pgstev >> 24) & 0x43;
1578 * if the next bit is too far away, stop.
1579 * if we reached the previous "next", find the next one
1582 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1585 next = find_next_bit(s->pgste_bitmap,
1586 s->bitmap_size, cur + 1);
1587 /* reached the end of the bitmap or of the buffer, stop */
1588 if ((next >= s->bitmap_size) ||
1589 (next >= args->start_gfn + bufsize))
1594 srcu_read_unlock(&kvm->srcu, srcu_idx);
1595 up_read(&kvm->mm->mmap_sem);
1597 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1599 rr = copy_to_user((void __user *)args->values, res, args->count);
1608 * This function sets the CMMA attributes for the given pages. If the input
1609 * buffer has zero length, no action is taken, otherwise the attributes are
1610 * set and the mm->context.use_cmma flag is set.
1612 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1613 const struct kvm_s390_cmma_log *args)
1615 unsigned long hva, mask, pgstev, i;
1617 int srcu_idx, r = 0;
1621 if (!kvm->arch.use_cmma)
1623 /* invalid/unsupported flags */
1624 if (args->flags != 0)
1626 /* Enforce sane limit on memory allocation */
1627 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1630 if (args->count == 0)
1633 bits = vmalloc(sizeof(*bits) * args->count);
1637 r = copy_from_user(bits, (void __user *)args->values, args->count);
1643 down_read(&kvm->mm->mmap_sem);
1644 srcu_idx = srcu_read_lock(&kvm->srcu);
1645 for (i = 0; i < args->count; i++) {
1646 hva = gfn_to_hva(kvm, args->start_gfn + i);
1647 if (kvm_is_error_hva(hva)) {
1653 pgstev = pgstev << 24;
1654 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1655 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1657 srcu_read_unlock(&kvm->srcu, srcu_idx);
1658 up_read(&kvm->mm->mmap_sem);
1660 if (!kvm->mm->context.use_cmma) {
1661 down_write(&kvm->mm->mmap_sem);
1662 kvm->mm->context.use_cmma = 1;
1663 up_write(&kvm->mm->mmap_sem);
1670 long kvm_arch_vm_ioctl(struct file *filp,
1671 unsigned int ioctl, unsigned long arg)
1673 struct kvm *kvm = filp->private_data;
1674 void __user *argp = (void __user *)arg;
1675 struct kvm_device_attr attr;
1679 case KVM_S390_INTERRUPT: {
1680 struct kvm_s390_interrupt s390int;
1683 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1685 r = kvm_s390_inject_vm(kvm, &s390int);
1688 case KVM_ENABLE_CAP: {
1689 struct kvm_enable_cap cap;
1691 if (copy_from_user(&cap, argp, sizeof(cap)))
1693 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1696 case KVM_CREATE_IRQCHIP: {
1697 struct kvm_irq_routing_entry routing;
1700 if (kvm->arch.use_irqchip) {
1701 /* Set up dummy routing. */
1702 memset(&routing, 0, sizeof(routing));
1703 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1707 case KVM_SET_DEVICE_ATTR: {
1709 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1711 r = kvm_s390_vm_set_attr(kvm, &attr);
1714 case KVM_GET_DEVICE_ATTR: {
1716 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1718 r = kvm_s390_vm_get_attr(kvm, &attr);
1721 case KVM_HAS_DEVICE_ATTR: {
1723 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1725 r = kvm_s390_vm_has_attr(kvm, &attr);
1728 case KVM_S390_GET_SKEYS: {
1729 struct kvm_s390_skeys args;
1732 if (copy_from_user(&args, argp,
1733 sizeof(struct kvm_s390_skeys)))
1735 r = kvm_s390_get_skeys(kvm, &args);
1738 case KVM_S390_SET_SKEYS: {
1739 struct kvm_s390_skeys args;
1742 if (copy_from_user(&args, argp,
1743 sizeof(struct kvm_s390_skeys)))
1745 r = kvm_s390_set_skeys(kvm, &args);
1748 case KVM_S390_GET_CMMA_BITS: {
1749 struct kvm_s390_cmma_log args;
1752 if (copy_from_user(&args, argp, sizeof(args)))
1754 mutex_lock(&kvm->slots_lock);
1755 r = kvm_s390_get_cmma_bits(kvm, &args);
1756 mutex_unlock(&kvm->slots_lock);
1758 r = copy_to_user(argp, &args, sizeof(args));
1764 case KVM_S390_SET_CMMA_BITS: {
1765 struct kvm_s390_cmma_log args;
1768 if (copy_from_user(&args, argp, sizeof(args)))
1770 mutex_lock(&kvm->slots_lock);
1771 r = kvm_s390_set_cmma_bits(kvm, &args);
1772 mutex_unlock(&kvm->slots_lock);
1782 static int kvm_s390_query_ap_config(u8 *config)
1784 u32 fcn_code = 0x04000000UL;
1787 memset(config, 0, 128);
1791 ".long 0xb2af0000\n" /* PQAP(QCI) */
1797 : "r" (fcn_code), "r" (config)
1798 : "cc", "0", "2", "memory"
1804 static int kvm_s390_apxa_installed(void)
1809 if (test_facility(12)) {
1810 cc = kvm_s390_query_ap_config(config);
1813 pr_err("PQAP(QCI) failed with cc=%d", cc);
1815 return config[0] & 0x40;
1821 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1823 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1825 if (kvm_s390_apxa_installed())
1826 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1828 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1831 static u64 kvm_s390_get_initial_cpuid(void)
1836 cpuid.version = 0xff;
1837 return *((u64 *) &cpuid);
1840 static void kvm_s390_crypto_init(struct kvm *kvm)
1842 if (!test_kvm_facility(kvm, 76))
1845 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1846 kvm_s390_set_crycb_format(kvm);
1848 /* Enable AES/DEA protected key functions by default */
1849 kvm->arch.crypto.aes_kw = 1;
1850 kvm->arch.crypto.dea_kw = 1;
1851 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1852 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1853 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1854 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1857 static void sca_dispose(struct kvm *kvm)
1859 if (kvm->arch.use_esca)
1860 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1862 free_page((unsigned long)(kvm->arch.sca));
1863 kvm->arch.sca = NULL;
1866 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1868 gfp_t alloc_flags = GFP_KERNEL;
1870 char debug_name[16];
1871 static unsigned long sca_offset;
1874 #ifdef CONFIG_KVM_S390_UCONTROL
1875 if (type & ~KVM_VM_S390_UCONTROL)
1877 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1884 rc = s390_enable_sie();
1890 kvm->arch.use_esca = 0; /* start with basic SCA */
1891 if (!sclp.has_64bscao)
1892 alloc_flags |= GFP_DMA;
1893 rwlock_init(&kvm->arch.sca_lock);
1894 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1897 spin_lock(&kvm_lock);
1899 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1901 kvm->arch.sca = (struct bsca_block *)
1902 ((char *) kvm->arch.sca + sca_offset);
1903 spin_unlock(&kvm_lock);
1905 sprintf(debug_name, "kvm-%u", current->pid);
1907 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1911 kvm->arch.sie_page2 =
1912 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1913 if (!kvm->arch.sie_page2)
1916 /* Populate the facility mask initially. */
1917 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1918 sizeof(S390_lowcore.stfle_fac_list));
1919 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1920 if (i < kvm_s390_fac_list_mask_size())
1921 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1923 kvm->arch.model.fac_mask[i] = 0UL;
1926 /* Populate the facility list initially. */
1927 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1928 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1929 S390_ARCH_FAC_LIST_SIZE_BYTE);
1931 /* we are always in czam mode - even on pre z14 machines */
1932 set_kvm_facility(kvm->arch.model.fac_mask, 138);
1933 set_kvm_facility(kvm->arch.model.fac_list, 138);
1934 /* we emulate STHYI in kvm */
1935 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1936 set_kvm_facility(kvm->arch.model.fac_list, 74);
1937 if (MACHINE_HAS_TLB_GUEST) {
1938 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1939 set_kvm_facility(kvm->arch.model.fac_list, 147);
1942 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1943 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1945 kvm_s390_crypto_init(kvm);
1947 mutex_init(&kvm->arch.float_int.ais_lock);
1948 kvm->arch.float_int.simm = 0;
1949 kvm->arch.float_int.nimm = 0;
1950 spin_lock_init(&kvm->arch.float_int.lock);
1951 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1952 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1953 init_waitqueue_head(&kvm->arch.ipte_wq);
1954 mutex_init(&kvm->arch.ipte_mutex);
1956 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1957 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1959 if (type & KVM_VM_S390_UCONTROL) {
1960 kvm->arch.gmap = NULL;
1961 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1963 if (sclp.hamax == U64_MAX)
1964 kvm->arch.mem_limit = TASK_SIZE_MAX;
1966 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1968 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1969 if (!kvm->arch.gmap)
1971 kvm->arch.gmap->private = kvm;
1972 kvm->arch.gmap->pfault_enabled = 0;
1975 kvm->arch.css_support = 0;
1976 kvm->arch.use_irqchip = 0;
1977 kvm->arch.epoch = 0;
1979 spin_lock_init(&kvm->arch.start_stop_lock);
1980 kvm_s390_vsie_init(kvm);
1981 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1985 free_page((unsigned long)kvm->arch.sie_page2);
1986 debug_unregister(kvm->arch.dbf);
1988 KVM_EVENT(3, "creation of vm failed: %d", rc);
1992 bool kvm_arch_has_vcpu_debugfs(void)
1997 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2002 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2004 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2005 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2006 kvm_s390_clear_local_irqs(vcpu);
2007 kvm_clear_async_pf_completion_queue(vcpu);
2008 if (!kvm_is_ucontrol(vcpu->kvm))
2011 if (kvm_is_ucontrol(vcpu->kvm))
2012 gmap_remove(vcpu->arch.gmap);
2014 if (vcpu->kvm->arch.use_cmma)
2015 kvm_s390_vcpu_unsetup_cmma(vcpu);
2016 free_page((unsigned long)(vcpu->arch.sie_block));
2018 kvm_vcpu_uninit(vcpu);
2019 kmem_cache_free(kvm_vcpu_cache, vcpu);
2022 static void kvm_free_vcpus(struct kvm *kvm)
2025 struct kvm_vcpu *vcpu;
2027 kvm_for_each_vcpu(i, vcpu, kvm)
2028 kvm_arch_vcpu_destroy(vcpu);
2030 mutex_lock(&kvm->lock);
2031 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2032 kvm->vcpus[i] = NULL;
2034 atomic_set(&kvm->online_vcpus, 0);
2035 mutex_unlock(&kvm->lock);
2038 void kvm_arch_destroy_vm(struct kvm *kvm)
2040 kvm_free_vcpus(kvm);
2042 debug_unregister(kvm->arch.dbf);
2043 free_page((unsigned long)kvm->arch.sie_page2);
2044 if (!kvm_is_ucontrol(kvm))
2045 gmap_remove(kvm->arch.gmap);
2046 kvm_s390_destroy_adapters(kvm);
2047 kvm_s390_clear_float_irqs(kvm);
2048 kvm_s390_vsie_destroy(kvm);
2049 if (kvm->arch.migration_state) {
2050 vfree(kvm->arch.migration_state->pgste_bitmap);
2051 kfree(kvm->arch.migration_state);
2053 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2056 /* Section: vcpu related */
2057 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2059 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2060 if (!vcpu->arch.gmap)
2062 vcpu->arch.gmap->private = vcpu->kvm;
2067 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2069 if (!kvm_s390_use_sca_entries())
2071 read_lock(&vcpu->kvm->arch.sca_lock);
2072 if (vcpu->kvm->arch.use_esca) {
2073 struct esca_block *sca = vcpu->kvm->arch.sca;
2075 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2076 sca->cpu[vcpu->vcpu_id].sda = 0;
2078 struct bsca_block *sca = vcpu->kvm->arch.sca;
2080 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2081 sca->cpu[vcpu->vcpu_id].sda = 0;
2083 read_unlock(&vcpu->kvm->arch.sca_lock);
2086 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2088 if (!kvm_s390_use_sca_entries()) {
2089 struct bsca_block *sca = vcpu->kvm->arch.sca;
2091 /* we still need the basic sca for the ipte control */
2092 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2093 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2095 read_lock(&vcpu->kvm->arch.sca_lock);
2096 if (vcpu->kvm->arch.use_esca) {
2097 struct esca_block *sca = vcpu->kvm->arch.sca;
2099 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2100 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2101 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2102 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2103 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2105 struct bsca_block *sca = vcpu->kvm->arch.sca;
2107 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2108 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2109 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2110 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2112 read_unlock(&vcpu->kvm->arch.sca_lock);
2115 /* Basic SCA to Extended SCA data copy routines */
2116 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2119 d->sigp_ctrl.c = s->sigp_ctrl.c;
2120 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2123 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2127 d->ipte_control = s->ipte_control;
2129 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2130 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2133 static int sca_switch_to_extended(struct kvm *kvm)
2135 struct bsca_block *old_sca = kvm->arch.sca;
2136 struct esca_block *new_sca;
2137 struct kvm_vcpu *vcpu;
2138 unsigned int vcpu_idx;
2141 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2145 scaoh = (u32)((u64)(new_sca) >> 32);
2146 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2148 kvm_s390_vcpu_block_all(kvm);
2149 write_lock(&kvm->arch.sca_lock);
2151 sca_copy_b_to_e(new_sca, old_sca);
2153 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2154 vcpu->arch.sie_block->scaoh = scaoh;
2155 vcpu->arch.sie_block->scaol = scaol;
2156 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2158 kvm->arch.sca = new_sca;
2159 kvm->arch.use_esca = 1;
2161 write_unlock(&kvm->arch.sca_lock);
2162 kvm_s390_vcpu_unblock_all(kvm);
2164 free_page((unsigned long)old_sca);
2166 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2167 old_sca, kvm->arch.sca);
2171 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2175 if (!kvm_s390_use_sca_entries()) {
2176 if (id < KVM_MAX_VCPUS)
2180 if (id < KVM_S390_BSCA_CPU_SLOTS)
2182 if (!sclp.has_esca || !sclp.has_64bscao)
2185 mutex_lock(&kvm->lock);
2186 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2187 mutex_unlock(&kvm->lock);
2189 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2192 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2194 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2195 kvm_clear_async_pf_completion_queue(vcpu);
2196 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2202 kvm_s390_set_prefix(vcpu, 0);
2203 if (test_kvm_facility(vcpu->kvm, 64))
2204 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2205 if (test_kvm_facility(vcpu->kvm, 133))
2206 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2207 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2208 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2211 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2213 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2215 if (kvm_is_ucontrol(vcpu->kvm))
2216 return __kvm_ucontrol_vcpu_init(vcpu);
2221 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2222 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2224 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2225 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2226 vcpu->arch.cputm_start = get_tod_clock_fast();
2227 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2230 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2231 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2233 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2234 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2235 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2236 vcpu->arch.cputm_start = 0;
2237 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2240 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2241 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2243 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2244 vcpu->arch.cputm_enabled = true;
2245 __start_cpu_timer_accounting(vcpu);
2248 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2249 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2251 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2252 __stop_cpu_timer_accounting(vcpu);
2253 vcpu->arch.cputm_enabled = false;
2256 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2258 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2259 __enable_cpu_timer_accounting(vcpu);
2263 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2265 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2266 __disable_cpu_timer_accounting(vcpu);
2270 /* set the cpu timer - may only be called from the VCPU thread itself */
2271 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2273 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2274 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2275 if (vcpu->arch.cputm_enabled)
2276 vcpu->arch.cputm_start = get_tod_clock_fast();
2277 vcpu->arch.sie_block->cputm = cputm;
2278 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2282 /* update and get the cpu timer - can also be called from other VCPU threads */
2283 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2288 if (unlikely(!vcpu->arch.cputm_enabled))
2289 return vcpu->arch.sie_block->cputm;
2291 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2293 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2295 * If the writer would ever execute a read in the critical
2296 * section, e.g. in irq context, we have a deadlock.
2298 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2299 value = vcpu->arch.sie_block->cputm;
2300 /* if cputm_start is 0, accounting is being started/stopped */
2301 if (likely(vcpu->arch.cputm_start))
2302 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2303 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2308 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2311 gmap_enable(vcpu->arch.enabled_gmap);
2312 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2313 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2314 __start_cpu_timer_accounting(vcpu);
2318 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2321 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2322 __stop_cpu_timer_accounting(vcpu);
2323 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2324 vcpu->arch.enabled_gmap = gmap_get_enabled();
2325 gmap_disable(vcpu->arch.enabled_gmap);
2329 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2331 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2332 vcpu->arch.sie_block->gpsw.mask = 0UL;
2333 vcpu->arch.sie_block->gpsw.addr = 0UL;
2334 kvm_s390_set_prefix(vcpu, 0);
2335 kvm_s390_set_cpu_timer(vcpu, 0);
2336 vcpu->arch.sie_block->ckc = 0UL;
2337 vcpu->arch.sie_block->todpr = 0;
2338 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2339 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2340 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2341 /* make sure the new fpc will be lazily loaded */
2343 current->thread.fpu.fpc = 0;
2344 vcpu->arch.sie_block->gbea = 1;
2345 vcpu->arch.sie_block->pp = 0;
2346 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2347 kvm_clear_async_pf_completion_queue(vcpu);
2348 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2349 kvm_s390_vcpu_stop(vcpu);
2350 kvm_s390_clear_local_irqs(vcpu);
2353 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2355 mutex_lock(&vcpu->kvm->lock);
2357 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2359 mutex_unlock(&vcpu->kvm->lock);
2360 if (!kvm_is_ucontrol(vcpu->kvm)) {
2361 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2364 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2365 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2366 /* make vcpu_load load the right gmap on the first trigger */
2367 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2370 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2372 if (!test_kvm_facility(vcpu->kvm, 76))
2375 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2377 if (vcpu->kvm->arch.crypto.aes_kw)
2378 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2379 if (vcpu->kvm->arch.crypto.dea_kw)
2380 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2382 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2385 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2387 free_page(vcpu->arch.sie_block->cbrlo);
2388 vcpu->arch.sie_block->cbrlo = 0;
2391 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2393 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2394 if (!vcpu->arch.sie_block->cbrlo)
2397 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2401 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2403 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2405 vcpu->arch.sie_block->ibc = model->ibc;
2406 if (test_kvm_facility(vcpu->kvm, 7))
2407 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2410 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2414 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2418 if (test_kvm_facility(vcpu->kvm, 78))
2419 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2420 else if (test_kvm_facility(vcpu->kvm, 8))
2421 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2423 kvm_s390_vcpu_setup_model(vcpu);
2425 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2426 if (MACHINE_HAS_ESOP)
2427 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2428 if (test_kvm_facility(vcpu->kvm, 9))
2429 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2430 if (test_kvm_facility(vcpu->kvm, 73))
2431 vcpu->arch.sie_block->ecb |= ECB_TE;
2433 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2434 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2435 if (test_kvm_facility(vcpu->kvm, 130))
2436 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2437 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2439 vcpu->arch.sie_block->eca |= ECA_CEI;
2441 vcpu->arch.sie_block->eca |= ECA_IB;
2443 vcpu->arch.sie_block->eca |= ECA_SII;
2444 if (sclp.has_sigpif)
2445 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2446 if (test_kvm_facility(vcpu->kvm, 129)) {
2447 vcpu->arch.sie_block->eca |= ECA_VX;
2448 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2450 if (test_kvm_facility(vcpu->kvm, 139))
2451 vcpu->arch.sie_block->ecd |= ECD_MEF;
2453 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2455 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2458 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2460 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2462 if (vcpu->kvm->arch.use_cmma) {
2463 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2467 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2468 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2470 kvm_s390_vcpu_crypto_setup(vcpu);
2475 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2478 struct kvm_vcpu *vcpu;
2479 struct sie_page *sie_page;
2482 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2487 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2491 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2492 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2496 vcpu->arch.sie_block = &sie_page->sie_block;
2497 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2499 /* the real guest size will always be smaller than msl */
2500 vcpu->arch.sie_block->mso = 0;
2501 vcpu->arch.sie_block->msl = sclp.hamax;
2503 vcpu->arch.sie_block->icpua = id;
2504 spin_lock_init(&vcpu->arch.local_int.lock);
2505 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2506 vcpu->arch.local_int.wq = &vcpu->wq;
2507 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2508 seqcount_init(&vcpu->arch.cputm_seqcount);
2510 rc = kvm_vcpu_init(vcpu, kvm, id);
2512 goto out_free_sie_block;
2513 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2514 vcpu->arch.sie_block);
2515 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2519 free_page((unsigned long)(vcpu->arch.sie_block));
2521 kmem_cache_free(kvm_vcpu_cache, vcpu);
2526 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2528 return kvm_s390_vcpu_has_irq(vcpu, 0);
2531 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2533 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2536 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2538 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2542 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2544 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2547 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2549 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2553 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2555 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2559 * Kick a guest cpu out of SIE and wait until SIE is not running.
2560 * If the CPU is not running (e.g. waiting as idle) the function will
2561 * return immediately. */
2562 void exit_sie(struct kvm_vcpu *vcpu)
2564 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2565 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2569 /* Kick a guest cpu out of SIE to process a request synchronously */
2570 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2572 kvm_make_request(req, vcpu);
2573 kvm_s390_vcpu_request(vcpu);
2576 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2579 struct kvm *kvm = gmap->private;
2580 struct kvm_vcpu *vcpu;
2581 unsigned long prefix;
2584 if (gmap_is_shadow(gmap))
2586 if (start >= 1UL << 31)
2587 /* We are only interested in prefix pages */
2589 kvm_for_each_vcpu(i, vcpu, kvm) {
2590 /* match against both prefix pages */
2591 prefix = kvm_s390_get_prefix(vcpu);
2592 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2593 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2595 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2600 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2602 /* kvm common code refers to this, but never calls it */
2607 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2608 struct kvm_one_reg *reg)
2613 case KVM_REG_S390_TODPR:
2614 r = put_user(vcpu->arch.sie_block->todpr,
2615 (u32 __user *)reg->addr);
2617 case KVM_REG_S390_EPOCHDIFF:
2618 r = put_user(vcpu->arch.sie_block->epoch,
2619 (u64 __user *)reg->addr);
2621 case KVM_REG_S390_CPU_TIMER:
2622 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2623 (u64 __user *)reg->addr);
2625 case KVM_REG_S390_CLOCK_COMP:
2626 r = put_user(vcpu->arch.sie_block->ckc,
2627 (u64 __user *)reg->addr);
2629 case KVM_REG_S390_PFTOKEN:
2630 r = put_user(vcpu->arch.pfault_token,
2631 (u64 __user *)reg->addr);
2633 case KVM_REG_S390_PFCOMPARE:
2634 r = put_user(vcpu->arch.pfault_compare,
2635 (u64 __user *)reg->addr);
2637 case KVM_REG_S390_PFSELECT:
2638 r = put_user(vcpu->arch.pfault_select,
2639 (u64 __user *)reg->addr);
2641 case KVM_REG_S390_PP:
2642 r = put_user(vcpu->arch.sie_block->pp,
2643 (u64 __user *)reg->addr);
2645 case KVM_REG_S390_GBEA:
2646 r = put_user(vcpu->arch.sie_block->gbea,
2647 (u64 __user *)reg->addr);
2656 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2657 struct kvm_one_reg *reg)
2663 case KVM_REG_S390_TODPR:
2664 r = get_user(vcpu->arch.sie_block->todpr,
2665 (u32 __user *)reg->addr);
2667 case KVM_REG_S390_EPOCHDIFF:
2668 r = get_user(vcpu->arch.sie_block->epoch,
2669 (u64 __user *)reg->addr);
2671 case KVM_REG_S390_CPU_TIMER:
2672 r = get_user(val, (u64 __user *)reg->addr);
2674 kvm_s390_set_cpu_timer(vcpu, val);
2676 case KVM_REG_S390_CLOCK_COMP:
2677 r = get_user(vcpu->arch.sie_block->ckc,
2678 (u64 __user *)reg->addr);
2680 case KVM_REG_S390_PFTOKEN:
2681 r = get_user(vcpu->arch.pfault_token,
2682 (u64 __user *)reg->addr);
2683 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2684 kvm_clear_async_pf_completion_queue(vcpu);
2686 case KVM_REG_S390_PFCOMPARE:
2687 r = get_user(vcpu->arch.pfault_compare,
2688 (u64 __user *)reg->addr);
2690 case KVM_REG_S390_PFSELECT:
2691 r = get_user(vcpu->arch.pfault_select,
2692 (u64 __user *)reg->addr);
2694 case KVM_REG_S390_PP:
2695 r = get_user(vcpu->arch.sie_block->pp,
2696 (u64 __user *)reg->addr);
2698 case KVM_REG_S390_GBEA:
2699 r = get_user(vcpu->arch.sie_block->gbea,
2700 (u64 __user *)reg->addr);
2709 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2711 kvm_s390_vcpu_initial_reset(vcpu);
2715 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2717 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2721 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2723 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2727 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2728 struct kvm_sregs *sregs)
2730 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2731 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2735 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2736 struct kvm_sregs *sregs)
2738 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2739 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2743 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2745 if (test_fp_ctl(fpu->fpc))
2747 vcpu->run->s.regs.fpc = fpu->fpc;
2749 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2750 (freg_t *) fpu->fprs);
2752 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2756 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2758 /* make sure we have the latest values */
2761 convert_vx_to_fp((freg_t *) fpu->fprs,
2762 (__vector128 *) vcpu->run->s.regs.vrs);
2764 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2765 fpu->fpc = vcpu->run->s.regs.fpc;
2769 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2773 if (!is_vcpu_stopped(vcpu))
2776 vcpu->run->psw_mask = psw.mask;
2777 vcpu->run->psw_addr = psw.addr;
2782 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2783 struct kvm_translation *tr)
2785 return -EINVAL; /* not implemented yet */
2788 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2789 KVM_GUESTDBG_USE_HW_BP | \
2790 KVM_GUESTDBG_ENABLE)
2792 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2793 struct kvm_guest_debug *dbg)
2797 vcpu->guest_debug = 0;
2798 kvm_s390_clear_bp_data(vcpu);
2800 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2802 if (!sclp.has_gpere)
2805 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2806 vcpu->guest_debug = dbg->control;
2807 /* enforce guest PER */
2808 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2810 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2811 rc = kvm_s390_import_bp_data(vcpu, dbg);
2813 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2814 vcpu->arch.guestdbg.last_bp = 0;
2818 vcpu->guest_debug = 0;
2819 kvm_s390_clear_bp_data(vcpu);
2820 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2826 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2827 struct kvm_mp_state *mp_state)
2829 /* CHECK_STOP and LOAD are not supported yet */
2830 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2831 KVM_MP_STATE_OPERATING;
2834 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2835 struct kvm_mp_state *mp_state)
2839 /* user space knows about this interface - let it control the state */
2840 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2842 switch (mp_state->mp_state) {
2843 case KVM_MP_STATE_STOPPED:
2844 kvm_s390_vcpu_stop(vcpu);
2846 case KVM_MP_STATE_OPERATING:
2847 kvm_s390_vcpu_start(vcpu);
2849 case KVM_MP_STATE_LOAD:
2850 case KVM_MP_STATE_CHECK_STOP:
2851 /* fall through - CHECK_STOP and LOAD are not supported yet */
2859 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2861 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2864 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2867 kvm_s390_vcpu_request_handled(vcpu);
2868 if (!kvm_request_pending(vcpu))
2871 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2872 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2873 * This ensures that the ipte instruction for this request has
2874 * already finished. We might race against a second unmapper that
2875 * wants to set the blocking bit. Lets just retry the request loop.
2877 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2879 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2880 kvm_s390_get_prefix(vcpu),
2881 PAGE_SIZE * 2, PROT_WRITE);
2883 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2889 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2890 vcpu->arch.sie_block->ihcpu = 0xffff;
2894 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2895 if (!ibs_enabled(vcpu)) {
2896 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2897 atomic_or(CPUSTAT_IBS,
2898 &vcpu->arch.sie_block->cpuflags);
2903 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2904 if (ibs_enabled(vcpu)) {
2905 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2906 atomic_andnot(CPUSTAT_IBS,
2907 &vcpu->arch.sie_block->cpuflags);
2912 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2913 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2917 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2919 * Disable CMMA virtualization; we will emulate the ESSA
2920 * instruction manually, in order to provide additional
2921 * functionalities needed for live migration.
2923 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2927 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2929 * Re-enable CMMA virtualization if CMMA is available and
2932 if ((vcpu->kvm->arch.use_cmma) &&
2933 (vcpu->kvm->mm->context.use_cmma))
2934 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2938 /* nothing to do, just clear the request */
2939 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2944 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2945 const struct kvm_s390_vm_tod_clock *gtod)
2947 struct kvm_vcpu *vcpu;
2948 struct kvm_s390_tod_clock_ext htod;
2951 mutex_lock(&kvm->lock);
2954 get_tod_clock_ext((char *)&htod);
2956 kvm->arch.epoch = gtod->tod - htod.tod;
2957 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2959 if (kvm->arch.epoch > gtod->tod)
2960 kvm->arch.epdx -= 1;
2962 kvm_s390_vcpu_block_all(kvm);
2963 kvm_for_each_vcpu(i, vcpu, kvm) {
2964 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2965 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
2968 kvm_s390_vcpu_unblock_all(kvm);
2970 mutex_unlock(&kvm->lock);
2973 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2975 struct kvm_vcpu *vcpu;
2978 mutex_lock(&kvm->lock);
2980 kvm->arch.epoch = tod - get_tod_clock();
2981 kvm_s390_vcpu_block_all(kvm);
2982 kvm_for_each_vcpu(i, vcpu, kvm)
2983 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2984 kvm_s390_vcpu_unblock_all(kvm);
2986 mutex_unlock(&kvm->lock);
2990 * kvm_arch_fault_in_page - fault-in guest page if necessary
2991 * @vcpu: The corresponding virtual cpu
2992 * @gpa: Guest physical address
2993 * @writable: Whether the page should be writable or not
2995 * Make sure that a guest page has been faulted-in on the host.
2997 * Return: Zero on success, negative error code otherwise.
2999 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3001 return gmap_fault(vcpu->arch.gmap, gpa,
3002 writable ? FAULT_FLAG_WRITE : 0);
3005 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3006 unsigned long token)
3008 struct kvm_s390_interrupt inti;
3009 struct kvm_s390_irq irq;
3012 irq.u.ext.ext_params2 = token;
3013 irq.type = KVM_S390_INT_PFAULT_INIT;
3014 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3016 inti.type = KVM_S390_INT_PFAULT_DONE;
3017 inti.parm64 = token;
3018 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3022 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3023 struct kvm_async_pf *work)
3025 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3026 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3029 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3030 struct kvm_async_pf *work)
3032 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3033 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3036 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3037 struct kvm_async_pf *work)
3039 /* s390 will always inject the page directly */
3042 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3045 * s390 will always inject the page directly,
3046 * but we still want check_async_completion to cleanup
3051 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3054 struct kvm_arch_async_pf arch;
3057 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3059 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3060 vcpu->arch.pfault_compare)
3062 if (psw_extint_disabled(vcpu))
3064 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3066 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3068 if (!vcpu->arch.gmap->pfault_enabled)
3071 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3072 hva += current->thread.gmap_addr & ~PAGE_MASK;
3073 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3076 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3080 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3085 * On s390 notifications for arriving pages will be delivered directly
3086 * to the guest but the house keeping for completed pfaults is
3087 * handled outside the worker.
3089 kvm_check_async_pf_completion(vcpu);
3091 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3092 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3097 if (test_cpu_flag(CIF_MCCK_PENDING))
3100 if (!kvm_is_ucontrol(vcpu->kvm)) {
3101 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3106 rc = kvm_s390_handle_requests(vcpu);
3110 if (guestdbg_enabled(vcpu)) {
3111 kvm_s390_backup_guest_per_regs(vcpu);
3112 kvm_s390_patch_guest_per_regs(vcpu);
3115 vcpu->arch.sie_block->icptcode = 0;
3116 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3117 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3118 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3123 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3125 struct kvm_s390_pgm_info pgm_info = {
3126 .code = PGM_ADDRESSING,
3131 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3132 trace_kvm_s390_sie_fault(vcpu);
3135 * We want to inject an addressing exception, which is defined as a
3136 * suppressing or terminating exception. However, since we came here
3137 * by a DAT access exception, the PSW still points to the faulting
3138 * instruction since DAT exceptions are nullifying. So we've got
3139 * to look up the current opcode to get the length of the instruction
3140 * to be able to forward the PSW.
3142 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3143 ilen = insn_length(opcode);
3147 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3148 * Forward by arbitrary ilc, injection will take care of
3149 * nullification if necessary.
3151 pgm_info = vcpu->arch.pgm;
3154 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3155 kvm_s390_forward_psw(vcpu, ilen);
3156 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3159 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3161 struct mcck_volatile_info *mcck_info;
3162 struct sie_page *sie_page;
3164 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3165 vcpu->arch.sie_block->icptcode);
3166 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3168 if (guestdbg_enabled(vcpu))
3169 kvm_s390_restore_guest_per_regs(vcpu);
3171 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3172 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3174 if (exit_reason == -EINTR) {
3175 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3176 sie_page = container_of(vcpu->arch.sie_block,
3177 struct sie_page, sie_block);
3178 mcck_info = &sie_page->mcck_info;
3179 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3183 if (vcpu->arch.sie_block->icptcode > 0) {
3184 int rc = kvm_handle_sie_intercept(vcpu);
3186 if (rc != -EOPNOTSUPP)
3188 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3189 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3190 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3191 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3193 } else if (exit_reason != -EFAULT) {
3194 vcpu->stat.exit_null++;
3196 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3197 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3198 vcpu->run->s390_ucontrol.trans_exc_code =
3199 current->thread.gmap_addr;
3200 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3202 } else if (current->thread.gmap_pfault) {
3203 trace_kvm_s390_major_guest_pfault(vcpu);
3204 current->thread.gmap_pfault = 0;
3205 if (kvm_arch_setup_async_pf(vcpu))
3207 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3209 return vcpu_post_run_fault_in_sie(vcpu);
3212 static int __vcpu_run(struct kvm_vcpu *vcpu)
3214 int rc, exit_reason;
3217 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3218 * ning the guest), so that memslots (and other stuff) are protected
3220 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3223 rc = vcpu_pre_run(vcpu);
3227 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3229 * As PF_VCPU will be used in fault handler, between
3230 * guest_enter and guest_exit should be no uaccess.
3232 local_irq_disable();
3233 guest_enter_irqoff();
3234 __disable_cpu_timer_accounting(vcpu);
3236 exit_reason = sie64a(vcpu->arch.sie_block,
3237 vcpu->run->s.regs.gprs);
3238 local_irq_disable();
3239 __enable_cpu_timer_accounting(vcpu);
3240 guest_exit_irqoff();
3242 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3244 rc = vcpu_post_run(vcpu, exit_reason);
3245 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);