2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
38 #include <asm/pgtable.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
49 #define KMSG_COMPONENT "kvm-s390"
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
53 #define CREATE_TRACE_POINTS
55 #include "trace-s390.h"
57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 { "userspace_handled", VCPU_STAT(exit_userspace) },
66 { "exit_null", VCPU_STAT(exit_null) },
67 { "exit_validity", VCPU_STAT(exit_validity) },
68 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 { "exit_external_request", VCPU_STAT(exit_external_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 { "instruction_spx", VCPU_STAT(instruction_spx) },
96 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 { "instruction_stap", VCPU_STAT(instruction_stap) },
98 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 { "instruction_essa", VCPU_STAT(instruction_essa) },
103 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 { "instruction_sie", VCPU_STAT(instruction_sie) },
108 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 { "diagnose_10", VCPU_STAT(diagnose_10) },
125 { "diagnose_44", VCPU_STAT(diagnose_44) },
126 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 { "diagnose_258", VCPU_STAT(diagnose_258) },
128 { "diagnose_308", VCPU_STAT(diagnose_308) },
129 { "diagnose_500", VCPU_STAT(diagnose_500) },
133 struct kvm_s390_tod_clock_ext {
139 /* allow nested virtualization in KVM (if enabled by user space) */
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
147 unsigned long kvm_s390_fac_list_mask_size(void)
149 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150 return ARRAY_SIZE(kvm_s390_fac_list_mask);
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
162 /* Section: not file related */
163 int kvm_arch_hardware_enable(void)
165 /* every s390 is virtualization enabled ;-) */
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
173 * This callback is executed during stop_machine(). All CPUs are therefore
174 * temporarily stopped. In order not to change guest behavior, we have to
175 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
176 * so a CPU won't be stopped while calculating with the epoch.
178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
182 struct kvm_vcpu *vcpu;
184 unsigned long long *delta = v;
186 list_for_each_entry(kvm, &vm_list, vm_list) {
187 kvm->arch.epoch -= *delta;
188 kvm_for_each_vcpu(i, vcpu, kvm) {
189 vcpu->arch.sie_block->epoch -= *delta;
190 if (vcpu->arch.cputm_enabled)
191 vcpu->arch.cputm_start += *delta;
192 if (vcpu->arch.vsie_block)
193 vcpu->arch.vsie_block->epoch -= *delta;
199 static struct notifier_block kvm_clock_notifier = {
200 .notifier_call = kvm_clock_sync,
203 int kvm_arch_hardware_setup(void)
205 gmap_notifier.notifier_call = kvm_gmap_notifier;
206 gmap_register_pte_notifier(&gmap_notifier);
207 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
208 gmap_register_pte_notifier(&vsie_gmap_notifier);
209 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
210 &kvm_clock_notifier);
214 void kvm_arch_hardware_unsetup(void)
216 gmap_unregister_pte_notifier(&gmap_notifier);
217 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
218 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
219 &kvm_clock_notifier);
222 static void allow_cpu_feat(unsigned long nr)
224 set_bit_inv(nr, kvm_s390_available_cpu_feat);
227 static inline int plo_test_bit(unsigned char nr)
229 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
233 /* Parameter registers are ignored for "test bit" */
243 static void kvm_s390_cpu_feat_init(void)
247 for (i = 0; i < 256; ++i) {
249 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
252 if (test_facility(28)) /* TOD-clock steering */
253 ptff(kvm_s390_available_subfunc.ptff,
254 sizeof(kvm_s390_available_subfunc.ptff),
257 if (test_facility(17)) { /* MSA */
258 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
259 kvm_s390_available_subfunc.kmac);
260 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.kmc);
262 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
263 kvm_s390_available_subfunc.km);
264 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
265 kvm_s390_available_subfunc.kimd);
266 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
267 kvm_s390_available_subfunc.klmd);
269 if (test_facility(76)) /* MSA3 */
270 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
271 kvm_s390_available_subfunc.pckmo);
272 if (test_facility(77)) { /* MSA4 */
273 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.kmctr);
275 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
276 kvm_s390_available_subfunc.kmf);
277 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
278 kvm_s390_available_subfunc.kmo);
279 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
280 kvm_s390_available_subfunc.pcc);
282 if (test_facility(57)) /* MSA5 */
283 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
284 kvm_s390_available_subfunc.ppno);
286 if (test_facility(146)) /* MSA8 */
287 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
288 kvm_s390_available_subfunc.kma);
290 if (MACHINE_HAS_ESOP)
291 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
293 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
294 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
296 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
297 !test_facility(3) || !nested)
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
300 if (sclp.has_64bscao)
301 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
303 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
305 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
307 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
309 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
311 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
313 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
315 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
317 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
318 * all skey handling functions read/set the skey from the PGSTE
319 * instead of the real storage key.
321 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
322 * pages being detected as preserved although they are resident.
324 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
325 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
327 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
328 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
329 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
331 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
332 * cannot easily shadow the SCA because of the ipte lock.
336 int kvm_arch_init(void *opaque)
338 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
342 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
343 debug_unregister(kvm_s390_dbf);
347 kvm_s390_cpu_feat_init();
349 /* Register floating interrupt controller interface. */
350 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
353 void kvm_arch_exit(void)
355 debug_unregister(kvm_s390_dbf);
358 /* Section: device related */
359 long kvm_arch_dev_ioctl(struct file *filp,
360 unsigned int ioctl, unsigned long arg)
362 if (ioctl == KVM_S390_ENABLE_SIE)
363 return s390_enable_sie();
367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
372 case KVM_CAP_S390_PSW:
373 case KVM_CAP_S390_GMAP:
374 case KVM_CAP_SYNC_MMU:
375 #ifdef CONFIG_KVM_S390_UCONTROL
376 case KVM_CAP_S390_UCONTROL:
378 case KVM_CAP_ASYNC_PF:
379 case KVM_CAP_SYNC_REGS:
380 case KVM_CAP_ONE_REG:
381 case KVM_CAP_ENABLE_CAP:
382 case KVM_CAP_S390_CSS_SUPPORT:
383 case KVM_CAP_IOEVENTFD:
384 case KVM_CAP_DEVICE_CTRL:
385 case KVM_CAP_ENABLE_CAP_VM:
386 case KVM_CAP_S390_IRQCHIP:
387 case KVM_CAP_VM_ATTRIBUTES:
388 case KVM_CAP_MP_STATE:
389 case KVM_CAP_IMMEDIATE_EXIT:
390 case KVM_CAP_S390_INJECT_IRQ:
391 case KVM_CAP_S390_USER_SIGP:
392 case KVM_CAP_S390_USER_STSI:
393 case KVM_CAP_S390_SKEYS:
394 case KVM_CAP_S390_IRQ_STATE:
395 case KVM_CAP_S390_USER_INSTR0:
396 case KVM_CAP_S390_CMMA_MIGRATION:
397 case KVM_CAP_S390_AIS:
400 case KVM_CAP_S390_MEM_OP:
403 case KVM_CAP_NR_VCPUS:
404 case KVM_CAP_MAX_VCPUS:
405 r = KVM_S390_BSCA_CPU_SLOTS;
406 if (!kvm_s390_use_sca_entries())
408 else if (sclp.has_esca && sclp.has_64bscao)
409 r = KVM_S390_ESCA_CPU_SLOTS;
411 case KVM_CAP_NR_MEMSLOTS:
412 r = KVM_USER_MEM_SLOTS;
414 case KVM_CAP_S390_COW:
415 r = MACHINE_HAS_ESOP;
417 case KVM_CAP_S390_VECTOR_REGISTERS:
420 case KVM_CAP_S390_RI:
421 r = test_facility(64);
423 case KVM_CAP_S390_GS:
424 r = test_facility(133);
432 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
433 struct kvm_memory_slot *memslot)
435 gfn_t cur_gfn, last_gfn;
436 unsigned long address;
437 struct gmap *gmap = kvm->arch.gmap;
439 /* Loop over all guest pages */
440 last_gfn = memslot->base_gfn + memslot->npages;
441 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
442 address = gfn_to_hva_memslot(memslot, cur_gfn);
444 if (test_and_clear_guest_dirty(gmap->mm, address))
445 mark_page_dirty(kvm, cur_gfn);
446 if (fatal_signal_pending(current))
452 /* Section: vm related */
453 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
456 * Get (and clear) the dirty memory log for a memory slot.
458 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
459 struct kvm_dirty_log *log)
463 struct kvm_memslots *slots;
464 struct kvm_memory_slot *memslot;
467 if (kvm_is_ucontrol(kvm))
470 mutex_lock(&kvm->slots_lock);
473 if (log->slot >= KVM_USER_MEM_SLOTS)
476 slots = kvm_memslots(kvm);
477 memslot = id_to_memslot(slots, log->slot);
479 if (!memslot->dirty_bitmap)
482 kvm_s390_sync_dirty_log(kvm, memslot);
483 r = kvm_get_dirty_log(kvm, log, &is_dirty);
487 /* Clear the dirty log */
489 n = kvm_dirty_bitmap_bytes(memslot);
490 memset(memslot->dirty_bitmap, 0, n);
494 mutex_unlock(&kvm->slots_lock);
498 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
501 struct kvm_vcpu *vcpu;
503 kvm_for_each_vcpu(i, vcpu, kvm) {
504 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
508 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
516 case KVM_CAP_S390_IRQCHIP:
517 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
518 kvm->arch.use_irqchip = 1;
521 case KVM_CAP_S390_USER_SIGP:
522 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
523 kvm->arch.user_sigp = 1;
526 case KVM_CAP_S390_VECTOR_REGISTERS:
527 mutex_lock(&kvm->lock);
528 if (kvm->created_vcpus) {
530 } else if (MACHINE_HAS_VX) {
531 set_kvm_facility(kvm->arch.model.fac_mask, 129);
532 set_kvm_facility(kvm->arch.model.fac_list, 129);
533 if (test_facility(134)) {
534 set_kvm_facility(kvm->arch.model.fac_mask, 134);
535 set_kvm_facility(kvm->arch.model.fac_list, 134);
537 if (test_facility(135)) {
538 set_kvm_facility(kvm->arch.model.fac_mask, 135);
539 set_kvm_facility(kvm->arch.model.fac_list, 135);
544 mutex_unlock(&kvm->lock);
545 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
546 r ? "(not available)" : "(success)");
548 case KVM_CAP_S390_RI:
550 mutex_lock(&kvm->lock);
551 if (kvm->created_vcpus) {
553 } else if (test_facility(64)) {
554 set_kvm_facility(kvm->arch.model.fac_mask, 64);
555 set_kvm_facility(kvm->arch.model.fac_list, 64);
558 mutex_unlock(&kvm->lock);
559 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
560 r ? "(not available)" : "(success)");
562 case KVM_CAP_S390_AIS:
563 mutex_lock(&kvm->lock);
564 if (kvm->created_vcpus) {
567 set_kvm_facility(kvm->arch.model.fac_mask, 72);
568 set_kvm_facility(kvm->arch.model.fac_list, 72);
571 mutex_unlock(&kvm->lock);
572 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
573 r ? "(not available)" : "(success)");
575 case KVM_CAP_S390_GS:
577 mutex_lock(&kvm->lock);
578 if (atomic_read(&kvm->online_vcpus)) {
580 } else if (test_facility(133)) {
581 set_kvm_facility(kvm->arch.model.fac_mask, 133);
582 set_kvm_facility(kvm->arch.model.fac_list, 133);
585 mutex_unlock(&kvm->lock);
586 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
587 r ? "(not available)" : "(success)");
589 case KVM_CAP_S390_USER_STSI:
590 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
591 kvm->arch.user_stsi = 1;
594 case KVM_CAP_S390_USER_INSTR0:
595 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
596 kvm->arch.user_instr0 = 1;
597 icpt_operexc_on_all_vcpus(kvm);
607 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
611 switch (attr->attr) {
612 case KVM_S390_VM_MEM_LIMIT_SIZE:
614 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
615 kvm->arch.mem_limit);
616 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
626 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
630 switch (attr->attr) {
631 case KVM_S390_VM_MEM_ENABLE_CMMA:
637 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
638 mutex_lock(&kvm->lock);
639 if (!kvm->created_vcpus) {
640 kvm->arch.use_cmma = 1;
643 mutex_unlock(&kvm->lock);
645 case KVM_S390_VM_MEM_CLR_CMMA:
650 if (!kvm->arch.use_cmma)
653 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
654 mutex_lock(&kvm->lock);
655 idx = srcu_read_lock(&kvm->srcu);
656 s390_reset_cmma(kvm->arch.gmap->mm);
657 srcu_read_unlock(&kvm->srcu, idx);
658 mutex_unlock(&kvm->lock);
661 case KVM_S390_VM_MEM_LIMIT_SIZE: {
662 unsigned long new_limit;
664 if (kvm_is_ucontrol(kvm))
667 if (get_user(new_limit, (u64 __user *)attr->addr))
670 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
671 new_limit > kvm->arch.mem_limit)
677 /* gmap_create takes last usable address */
678 if (new_limit != KVM_S390_NO_MEM_LIMIT)
682 mutex_lock(&kvm->lock);
683 if (!kvm->created_vcpus) {
684 /* gmap_create will round the limit up */
685 struct gmap *new = gmap_create(current->mm, new_limit);
690 gmap_remove(kvm->arch.gmap);
692 kvm->arch.gmap = new;
696 mutex_unlock(&kvm->lock);
697 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
698 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
699 (void *) kvm->arch.gmap->asce);
709 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
711 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
713 struct kvm_vcpu *vcpu;
716 if (!test_kvm_facility(kvm, 76))
719 mutex_lock(&kvm->lock);
720 switch (attr->attr) {
721 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
723 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
724 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
725 kvm->arch.crypto.aes_kw = 1;
726 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
728 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
730 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
731 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
732 kvm->arch.crypto.dea_kw = 1;
733 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
735 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
736 kvm->arch.crypto.aes_kw = 0;
737 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
738 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
739 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
741 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
742 kvm->arch.crypto.dea_kw = 0;
743 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
744 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
745 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
748 mutex_unlock(&kvm->lock);
752 kvm_for_each_vcpu(i, vcpu, kvm) {
753 kvm_s390_vcpu_crypto_setup(vcpu);
756 mutex_unlock(&kvm->lock);
760 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
763 struct kvm_vcpu *vcpu;
765 kvm_for_each_vcpu(cx, vcpu, kvm)
766 kvm_s390_sync_request(req, vcpu);
770 * Must be called with kvm->srcu held to avoid races on memslots, and with
771 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
773 static int kvm_s390_vm_start_migration(struct kvm *kvm)
775 struct kvm_s390_migration_state *mgs;
776 struct kvm_memory_slot *ms;
777 /* should be the only one */
778 struct kvm_memslots *slots;
779 unsigned long ram_pages;
782 /* migration mode already enabled */
783 if (kvm->arch.migration_state)
786 slots = kvm_memslots(kvm);
787 if (!slots || !slots->used_slots)
790 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
793 kvm->arch.migration_state = mgs;
795 if (kvm->arch.use_cmma) {
797 * Get the last slot. They should be sorted by base_gfn, so the
798 * last slot is also the one at the end of the address space.
799 * We have verified above that at least one slot is present.
801 ms = slots->memslots + slots->used_slots - 1;
802 /* round up so we only use full longs */
803 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
804 /* allocate enough bytes to store all the bits */
805 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
806 if (!mgs->pgste_bitmap) {
808 kvm->arch.migration_state = NULL;
812 mgs->bitmap_size = ram_pages;
813 atomic64_set(&mgs->dirty_pages, ram_pages);
814 /* mark all the pages in active slots as dirty */
815 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
816 ms = slots->memslots + slotnr;
817 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
820 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
826 * Must be called with kvm->lock to avoid races with ourselves and
827 * kvm_s390_vm_start_migration.
829 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
831 struct kvm_s390_migration_state *mgs;
833 /* migration mode already disabled */
834 if (!kvm->arch.migration_state)
836 mgs = kvm->arch.migration_state;
837 kvm->arch.migration_state = NULL;
839 if (kvm->arch.use_cmma) {
840 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
841 vfree(mgs->pgste_bitmap);
847 static int kvm_s390_vm_set_migration(struct kvm *kvm,
848 struct kvm_device_attr *attr)
850 int idx, res = -ENXIO;
852 mutex_lock(&kvm->lock);
853 switch (attr->attr) {
854 case KVM_S390_VM_MIGRATION_START:
855 idx = srcu_read_lock(&kvm->srcu);
856 res = kvm_s390_vm_start_migration(kvm);
857 srcu_read_unlock(&kvm->srcu, idx);
859 case KVM_S390_VM_MIGRATION_STOP:
860 res = kvm_s390_vm_stop_migration(kvm);
865 mutex_unlock(&kvm->lock);
870 static int kvm_s390_vm_get_migration(struct kvm *kvm,
871 struct kvm_device_attr *attr)
873 u64 mig = (kvm->arch.migration_state != NULL);
875 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
878 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
883 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
885 struct kvm_s390_vm_tod_clock gtod;
887 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
890 if (test_kvm_facility(kvm, 139))
891 kvm_s390_set_tod_clock_ext(kvm, >od);
892 else if (gtod.epoch_idx == 0)
893 kvm_s390_set_tod_clock(kvm, gtod.tod);
897 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
898 gtod.epoch_idx, gtod.tod);
903 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
907 if (copy_from_user(>od_high, (void __user *)attr->addr,
913 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
918 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
922 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
925 kvm_s390_set_tod_clock(kvm, gtod);
926 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
930 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
937 switch (attr->attr) {
938 case KVM_S390_VM_TOD_EXT:
939 ret = kvm_s390_set_tod_ext(kvm, attr);
941 case KVM_S390_VM_TOD_HIGH:
942 ret = kvm_s390_set_tod_high(kvm, attr);
944 case KVM_S390_VM_TOD_LOW:
945 ret = kvm_s390_set_tod_low(kvm, attr);
954 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
955 struct kvm_s390_vm_tod_clock *gtod)
957 struct kvm_s390_tod_clock_ext htod;
961 get_tod_clock_ext((char *)&htod);
963 gtod->tod = htod.tod + kvm->arch.epoch;
964 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
966 if (gtod->tod < htod.tod)
967 gtod->epoch_idx += 1;
972 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
974 struct kvm_s390_vm_tod_clock gtod;
976 memset(>od, 0, sizeof(gtod));
978 if (test_kvm_facility(kvm, 139))
979 kvm_s390_get_tod_clock_ext(kvm, >od);
981 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
983 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
986 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
987 gtod.epoch_idx, gtod.tod);
991 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
995 if (copy_to_user((void __user *)attr->addr, >od_high,
998 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1003 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1007 gtod = kvm_s390_get_tod_clock_fast(kvm);
1008 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1010 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1015 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1022 switch (attr->attr) {
1023 case KVM_S390_VM_TOD_EXT:
1024 ret = kvm_s390_get_tod_ext(kvm, attr);
1026 case KVM_S390_VM_TOD_HIGH:
1027 ret = kvm_s390_get_tod_high(kvm, attr);
1029 case KVM_S390_VM_TOD_LOW:
1030 ret = kvm_s390_get_tod_low(kvm, attr);
1039 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1041 struct kvm_s390_vm_cpu_processor *proc;
1042 u16 lowest_ibc, unblocked_ibc;
1045 mutex_lock(&kvm->lock);
1046 if (kvm->created_vcpus) {
1050 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1055 if (!copy_from_user(proc, (void __user *)attr->addr,
1057 kvm->arch.model.cpuid = proc->cpuid;
1058 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1059 unblocked_ibc = sclp.ibc & 0xfff;
1060 if (lowest_ibc && proc->ibc) {
1061 if (proc->ibc > unblocked_ibc)
1062 kvm->arch.model.ibc = unblocked_ibc;
1063 else if (proc->ibc < lowest_ibc)
1064 kvm->arch.model.ibc = lowest_ibc;
1066 kvm->arch.model.ibc = proc->ibc;
1068 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1069 S390_ARCH_FAC_LIST_SIZE_BYTE);
1070 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1071 kvm->arch.model.ibc,
1072 kvm->arch.model.cpuid);
1073 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1074 kvm->arch.model.fac_list[0],
1075 kvm->arch.model.fac_list[1],
1076 kvm->arch.model.fac_list[2]);
1081 mutex_unlock(&kvm->lock);
1085 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1086 struct kvm_device_attr *attr)
1088 struct kvm_s390_vm_cpu_feat data;
1091 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1093 if (!bitmap_subset((unsigned long *) data.feat,
1094 kvm_s390_available_cpu_feat,
1095 KVM_S390_VM_CPU_FEAT_NR_BITS))
1098 mutex_lock(&kvm->lock);
1099 if (!atomic_read(&kvm->online_vcpus)) {
1100 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1101 KVM_S390_VM_CPU_FEAT_NR_BITS);
1104 mutex_unlock(&kvm->lock);
1108 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1109 struct kvm_device_attr *attr)
1112 * Once supported by kernel + hw, we have to store the subfunctions
1113 * in kvm->arch and remember that user space configured them.
1118 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1122 switch (attr->attr) {
1123 case KVM_S390_VM_CPU_PROCESSOR:
1124 ret = kvm_s390_set_processor(kvm, attr);
1126 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1127 ret = kvm_s390_set_processor_feat(kvm, attr);
1129 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1130 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1136 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1138 struct kvm_s390_vm_cpu_processor *proc;
1141 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1146 proc->cpuid = kvm->arch.model.cpuid;
1147 proc->ibc = kvm->arch.model.ibc;
1148 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1149 S390_ARCH_FAC_LIST_SIZE_BYTE);
1150 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1151 kvm->arch.model.ibc,
1152 kvm->arch.model.cpuid);
1153 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1154 kvm->arch.model.fac_list[0],
1155 kvm->arch.model.fac_list[1],
1156 kvm->arch.model.fac_list[2]);
1157 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1164 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1166 struct kvm_s390_vm_cpu_machine *mach;
1169 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1174 get_cpu_id((struct cpuid *) &mach->cpuid);
1175 mach->ibc = sclp.ibc;
1176 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1177 S390_ARCH_FAC_LIST_SIZE_BYTE);
1178 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1179 sizeof(S390_lowcore.stfle_fac_list));
1180 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1181 kvm->arch.model.ibc,
1182 kvm->arch.model.cpuid);
1183 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1187 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1191 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1198 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1199 struct kvm_device_attr *attr)
1201 struct kvm_s390_vm_cpu_feat data;
1203 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1204 KVM_S390_VM_CPU_FEAT_NR_BITS);
1205 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1210 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1211 struct kvm_device_attr *attr)
1213 struct kvm_s390_vm_cpu_feat data;
1215 bitmap_copy((unsigned long *) data.feat,
1216 kvm_s390_available_cpu_feat,
1217 KVM_S390_VM_CPU_FEAT_NR_BITS);
1218 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1223 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1224 struct kvm_device_attr *attr)
1227 * Once we can actually configure subfunctions (kernel + hw support),
1228 * we have to check if they were already set by user space, if so copy
1229 * them from kvm->arch.
1234 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1235 struct kvm_device_attr *attr)
1237 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1238 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1242 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1246 switch (attr->attr) {
1247 case KVM_S390_VM_CPU_PROCESSOR:
1248 ret = kvm_s390_get_processor(kvm, attr);
1250 case KVM_S390_VM_CPU_MACHINE:
1251 ret = kvm_s390_get_machine(kvm, attr);
1253 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1254 ret = kvm_s390_get_processor_feat(kvm, attr);
1256 case KVM_S390_VM_CPU_MACHINE_FEAT:
1257 ret = kvm_s390_get_machine_feat(kvm, attr);
1259 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1260 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1262 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1263 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1269 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1273 switch (attr->group) {
1274 case KVM_S390_VM_MEM_CTRL:
1275 ret = kvm_s390_set_mem_control(kvm, attr);
1277 case KVM_S390_VM_TOD:
1278 ret = kvm_s390_set_tod(kvm, attr);
1280 case KVM_S390_VM_CPU_MODEL:
1281 ret = kvm_s390_set_cpu_model(kvm, attr);
1283 case KVM_S390_VM_CRYPTO:
1284 ret = kvm_s390_vm_set_crypto(kvm, attr);
1286 case KVM_S390_VM_MIGRATION:
1287 ret = kvm_s390_vm_set_migration(kvm, attr);
1297 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1301 switch (attr->group) {
1302 case KVM_S390_VM_MEM_CTRL:
1303 ret = kvm_s390_get_mem_control(kvm, attr);
1305 case KVM_S390_VM_TOD:
1306 ret = kvm_s390_get_tod(kvm, attr);
1308 case KVM_S390_VM_CPU_MODEL:
1309 ret = kvm_s390_get_cpu_model(kvm, attr);
1311 case KVM_S390_VM_MIGRATION:
1312 ret = kvm_s390_vm_get_migration(kvm, attr);
1322 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1326 switch (attr->group) {
1327 case KVM_S390_VM_MEM_CTRL:
1328 switch (attr->attr) {
1329 case KVM_S390_VM_MEM_ENABLE_CMMA:
1330 case KVM_S390_VM_MEM_CLR_CMMA:
1331 ret = sclp.has_cmma ? 0 : -ENXIO;
1333 case KVM_S390_VM_MEM_LIMIT_SIZE:
1341 case KVM_S390_VM_TOD:
1342 switch (attr->attr) {
1343 case KVM_S390_VM_TOD_LOW:
1344 case KVM_S390_VM_TOD_HIGH:
1352 case KVM_S390_VM_CPU_MODEL:
1353 switch (attr->attr) {
1354 case KVM_S390_VM_CPU_PROCESSOR:
1355 case KVM_S390_VM_CPU_MACHINE:
1356 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1357 case KVM_S390_VM_CPU_MACHINE_FEAT:
1358 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1361 /* configuring subfunctions is not supported yet */
1362 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1368 case KVM_S390_VM_CRYPTO:
1369 switch (attr->attr) {
1370 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1371 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1372 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1373 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1381 case KVM_S390_VM_MIGRATION:
1392 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1396 int srcu_idx, i, r = 0;
1398 if (args->flags != 0)
1401 /* Is this guest using storage keys? */
1402 if (!mm_use_skey(current->mm))
1403 return KVM_S390_GET_SKEYS_NONE;
1405 /* Enforce sane limit on memory allocation */
1406 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1409 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1413 down_read(¤t->mm->mmap_sem);
1414 srcu_idx = srcu_read_lock(&kvm->srcu);
1415 for (i = 0; i < args->count; i++) {
1416 hva = gfn_to_hva(kvm, args->start_gfn + i);
1417 if (kvm_is_error_hva(hva)) {
1422 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1426 srcu_read_unlock(&kvm->srcu, srcu_idx);
1427 up_read(¤t->mm->mmap_sem);
1430 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1431 sizeof(uint8_t) * args->count);
1440 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1444 int srcu_idx, i, r = 0;
1446 if (args->flags != 0)
1449 /* Enforce sane limit on memory allocation */
1450 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1453 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1457 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1458 sizeof(uint8_t) * args->count);
1464 /* Enable storage key handling for the guest */
1465 r = s390_enable_skey();
1469 down_read(¤t->mm->mmap_sem);
1470 srcu_idx = srcu_read_lock(&kvm->srcu);
1471 for (i = 0; i < args->count; i++) {
1472 hva = gfn_to_hva(kvm, args->start_gfn + i);
1473 if (kvm_is_error_hva(hva)) {
1478 /* Lowest order bit is reserved */
1479 if (keys[i] & 0x01) {
1484 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1488 srcu_read_unlock(&kvm->srcu, srcu_idx);
1489 up_read(¤t->mm->mmap_sem);
1496 * Base address and length must be sent at the start of each block, therefore
1497 * it's cheaper to send some clean data, as long as it's less than the size of
1500 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1501 /* for consistency */
1502 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1505 * This function searches for the next page with dirty CMMA attributes, and
1506 * saves the attributes in the buffer up to either the end of the buffer or
1507 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1508 * no trailing clean bytes are saved.
1509 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1510 * output buffer will indicate 0 as length.
1512 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1513 struct kvm_s390_cmma_log *args)
1515 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1516 unsigned long bufsize, hva, pgstev, i, next, cur;
1517 int srcu_idx, peek, r = 0, rr;
1520 cur = args->start_gfn;
1521 i = next = pgstev = 0;
1523 if (unlikely(!kvm->arch.use_cmma))
1525 /* Invalid/unsupported flags were specified */
1526 if (args->flags & ~KVM_S390_CMMA_PEEK)
1528 /* Migration mode query, and we are not doing a migration */
1529 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1532 /* CMMA is disabled or was not used, or the buffer has length zero */
1533 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1534 if (!bufsize || !kvm->mm->context.use_cmma) {
1535 memset(args, 0, sizeof(*args));
1540 /* We are not peeking, and there are no dirty pages */
1541 if (!atomic64_read(&s->dirty_pages)) {
1542 memset(args, 0, sizeof(*args));
1545 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1547 if (cur >= s->bitmap_size) /* nothing found, loop back */
1548 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1549 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1550 memset(args, 0, sizeof(*args));
1553 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1556 res = vmalloc(bufsize);
1560 args->start_gfn = cur;
1562 down_read(&kvm->mm->mmap_sem);
1563 srcu_idx = srcu_read_lock(&kvm->srcu);
1564 while (i < bufsize) {
1565 hva = gfn_to_hva(kvm, cur);
1566 if (kvm_is_error_hva(hva)) {
1570 /* decrement only if we actually flipped the bit to 0 */
1571 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1572 atomic64_dec(&s->dirty_pages);
1573 r = get_pgste(kvm->mm, hva, &pgstev);
1576 /* save the value */
1577 res[i++] = (pgstev >> 24) & 0x43;
1579 * if the next bit is too far away, stop.
1580 * if we reached the previous "next", find the next one
1583 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1586 next = find_next_bit(s->pgste_bitmap,
1587 s->bitmap_size, cur + 1);
1588 /* reached the end of the bitmap or of the buffer, stop */
1589 if ((next >= s->bitmap_size) ||
1590 (next >= args->start_gfn + bufsize))
1595 srcu_read_unlock(&kvm->srcu, srcu_idx);
1596 up_read(&kvm->mm->mmap_sem);
1598 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1600 rr = copy_to_user((void __user *)args->values, res, args->count);
1609 * This function sets the CMMA attributes for the given pages. If the input
1610 * buffer has zero length, no action is taken, otherwise the attributes are
1611 * set and the mm->context.use_cmma flag is set.
1613 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1614 const struct kvm_s390_cmma_log *args)
1616 unsigned long hva, mask, pgstev, i;
1618 int srcu_idx, r = 0;
1622 if (!kvm->arch.use_cmma)
1624 /* invalid/unsupported flags */
1625 if (args->flags != 0)
1627 /* Enforce sane limit on memory allocation */
1628 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1631 if (args->count == 0)
1634 bits = vmalloc(sizeof(*bits) * args->count);
1638 r = copy_from_user(bits, (void __user *)args->values, args->count);
1644 down_read(&kvm->mm->mmap_sem);
1645 srcu_idx = srcu_read_lock(&kvm->srcu);
1646 for (i = 0; i < args->count; i++) {
1647 hva = gfn_to_hva(kvm, args->start_gfn + i);
1648 if (kvm_is_error_hva(hva)) {
1654 pgstev = pgstev << 24;
1655 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1656 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1658 srcu_read_unlock(&kvm->srcu, srcu_idx);
1659 up_read(&kvm->mm->mmap_sem);
1661 if (!kvm->mm->context.use_cmma) {
1662 down_write(&kvm->mm->mmap_sem);
1663 kvm->mm->context.use_cmma = 1;
1664 up_write(&kvm->mm->mmap_sem);
1671 long kvm_arch_vm_ioctl(struct file *filp,
1672 unsigned int ioctl, unsigned long arg)
1674 struct kvm *kvm = filp->private_data;
1675 void __user *argp = (void __user *)arg;
1676 struct kvm_device_attr attr;
1680 case KVM_S390_INTERRUPT: {
1681 struct kvm_s390_interrupt s390int;
1684 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1686 r = kvm_s390_inject_vm(kvm, &s390int);
1689 case KVM_ENABLE_CAP: {
1690 struct kvm_enable_cap cap;
1692 if (copy_from_user(&cap, argp, sizeof(cap)))
1694 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1697 case KVM_CREATE_IRQCHIP: {
1698 struct kvm_irq_routing_entry routing;
1701 if (kvm->arch.use_irqchip) {
1702 /* Set up dummy routing. */
1703 memset(&routing, 0, sizeof(routing));
1704 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1708 case KVM_SET_DEVICE_ATTR: {
1710 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1712 r = kvm_s390_vm_set_attr(kvm, &attr);
1715 case KVM_GET_DEVICE_ATTR: {
1717 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1719 r = kvm_s390_vm_get_attr(kvm, &attr);
1722 case KVM_HAS_DEVICE_ATTR: {
1724 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1726 r = kvm_s390_vm_has_attr(kvm, &attr);
1729 case KVM_S390_GET_SKEYS: {
1730 struct kvm_s390_skeys args;
1733 if (copy_from_user(&args, argp,
1734 sizeof(struct kvm_s390_skeys)))
1736 r = kvm_s390_get_skeys(kvm, &args);
1739 case KVM_S390_SET_SKEYS: {
1740 struct kvm_s390_skeys args;
1743 if (copy_from_user(&args, argp,
1744 sizeof(struct kvm_s390_skeys)))
1746 r = kvm_s390_set_skeys(kvm, &args);
1749 case KVM_S390_GET_CMMA_BITS: {
1750 struct kvm_s390_cmma_log args;
1753 if (copy_from_user(&args, argp, sizeof(args)))
1755 r = kvm_s390_get_cmma_bits(kvm, &args);
1757 r = copy_to_user(argp, &args, sizeof(args));
1763 case KVM_S390_SET_CMMA_BITS: {
1764 struct kvm_s390_cmma_log args;
1767 if (copy_from_user(&args, argp, sizeof(args)))
1769 r = kvm_s390_set_cmma_bits(kvm, &args);
1779 static int kvm_s390_query_ap_config(u8 *config)
1781 u32 fcn_code = 0x04000000UL;
1784 memset(config, 0, 128);
1788 ".long 0xb2af0000\n" /* PQAP(QCI) */
1794 : "r" (fcn_code), "r" (config)
1795 : "cc", "0", "2", "memory"
1801 static int kvm_s390_apxa_installed(void)
1806 if (test_facility(12)) {
1807 cc = kvm_s390_query_ap_config(config);
1810 pr_err("PQAP(QCI) failed with cc=%d", cc);
1812 return config[0] & 0x40;
1818 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1820 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1822 if (kvm_s390_apxa_installed())
1823 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1825 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1828 static u64 kvm_s390_get_initial_cpuid(void)
1833 cpuid.version = 0xff;
1834 return *((u64 *) &cpuid);
1837 static void kvm_s390_crypto_init(struct kvm *kvm)
1839 if (!test_kvm_facility(kvm, 76))
1842 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1843 kvm_s390_set_crycb_format(kvm);
1845 /* Enable AES/DEA protected key functions by default */
1846 kvm->arch.crypto.aes_kw = 1;
1847 kvm->arch.crypto.dea_kw = 1;
1848 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1849 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1850 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1851 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1854 static void sca_dispose(struct kvm *kvm)
1856 if (kvm->arch.use_esca)
1857 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1859 free_page((unsigned long)(kvm->arch.sca));
1860 kvm->arch.sca = NULL;
1863 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1865 gfp_t alloc_flags = GFP_KERNEL;
1867 char debug_name[16];
1868 static unsigned long sca_offset;
1871 #ifdef CONFIG_KVM_S390_UCONTROL
1872 if (type & ~KVM_VM_S390_UCONTROL)
1874 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1881 rc = s390_enable_sie();
1887 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1889 kvm->arch.use_esca = 0; /* start with basic SCA */
1890 if (!sclp.has_64bscao)
1891 alloc_flags |= GFP_DMA;
1892 rwlock_init(&kvm->arch.sca_lock);
1893 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1896 spin_lock(&kvm_lock);
1898 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1900 kvm->arch.sca = (struct bsca_block *)
1901 ((char *) kvm->arch.sca + sca_offset);
1902 spin_unlock(&kvm_lock);
1904 sprintf(debug_name, "kvm-%u", current->pid);
1906 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1910 kvm->arch.sie_page2 =
1911 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1912 if (!kvm->arch.sie_page2)
1915 /* Populate the facility mask initially. */
1916 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1917 sizeof(S390_lowcore.stfle_fac_list));
1918 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1919 if (i < kvm_s390_fac_list_mask_size())
1920 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1922 kvm->arch.model.fac_mask[i] = 0UL;
1925 /* Populate the facility list initially. */
1926 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1927 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1928 S390_ARCH_FAC_LIST_SIZE_BYTE);
1930 /* we are always in czam mode - even on pre z14 machines */
1931 set_kvm_facility(kvm->arch.model.fac_mask, 138);
1932 set_kvm_facility(kvm->arch.model.fac_list, 138);
1933 /* we emulate STHYI in kvm */
1934 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1935 set_kvm_facility(kvm->arch.model.fac_list, 74);
1936 if (MACHINE_HAS_TLB_GUEST) {
1937 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1938 set_kvm_facility(kvm->arch.model.fac_list, 147);
1941 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1942 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1944 kvm_s390_crypto_init(kvm);
1946 mutex_init(&kvm->arch.float_int.ais_lock);
1947 kvm->arch.float_int.simm = 0;
1948 kvm->arch.float_int.nimm = 0;
1949 spin_lock_init(&kvm->arch.float_int.lock);
1950 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1951 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1952 init_waitqueue_head(&kvm->arch.ipte_wq);
1953 mutex_init(&kvm->arch.ipte_mutex);
1955 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1956 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1958 if (type & KVM_VM_S390_UCONTROL) {
1959 kvm->arch.gmap = NULL;
1960 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1962 if (sclp.hamax == U64_MAX)
1963 kvm->arch.mem_limit = TASK_SIZE_MAX;
1965 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1967 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1968 if (!kvm->arch.gmap)
1970 kvm->arch.gmap->private = kvm;
1971 kvm->arch.gmap->pfault_enabled = 0;
1974 kvm->arch.css_support = 0;
1975 kvm->arch.use_irqchip = 0;
1976 kvm->arch.epoch = 0;
1978 spin_lock_init(&kvm->arch.start_stop_lock);
1979 kvm_s390_vsie_init(kvm);
1980 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1984 free_page((unsigned long)kvm->arch.sie_page2);
1985 debug_unregister(kvm->arch.dbf);
1987 KVM_EVENT(3, "creation of vm failed: %d", rc);
1991 bool kvm_arch_has_vcpu_debugfs(void)
1996 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2001 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2003 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2004 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2005 kvm_s390_clear_local_irqs(vcpu);
2006 kvm_clear_async_pf_completion_queue(vcpu);
2007 if (!kvm_is_ucontrol(vcpu->kvm))
2010 if (kvm_is_ucontrol(vcpu->kvm))
2011 gmap_remove(vcpu->arch.gmap);
2013 if (vcpu->kvm->arch.use_cmma)
2014 kvm_s390_vcpu_unsetup_cmma(vcpu);
2015 free_page((unsigned long)(vcpu->arch.sie_block));
2017 kvm_vcpu_uninit(vcpu);
2018 kmem_cache_free(kvm_vcpu_cache, vcpu);
2021 static void kvm_free_vcpus(struct kvm *kvm)
2024 struct kvm_vcpu *vcpu;
2026 kvm_for_each_vcpu(i, vcpu, kvm)
2027 kvm_arch_vcpu_destroy(vcpu);
2029 mutex_lock(&kvm->lock);
2030 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2031 kvm->vcpus[i] = NULL;
2033 atomic_set(&kvm->online_vcpus, 0);
2034 mutex_unlock(&kvm->lock);
2037 void kvm_arch_destroy_vm(struct kvm *kvm)
2039 kvm_free_vcpus(kvm);
2041 debug_unregister(kvm->arch.dbf);
2042 free_page((unsigned long)kvm->arch.sie_page2);
2043 if (!kvm_is_ucontrol(kvm))
2044 gmap_remove(kvm->arch.gmap);
2045 kvm_s390_destroy_adapters(kvm);
2046 kvm_s390_clear_float_irqs(kvm);
2047 kvm_s390_vsie_destroy(kvm);
2048 if (kvm->arch.migration_state) {
2049 vfree(kvm->arch.migration_state->pgste_bitmap);
2050 kfree(kvm->arch.migration_state);
2052 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2055 /* Section: vcpu related */
2056 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2058 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2059 if (!vcpu->arch.gmap)
2061 vcpu->arch.gmap->private = vcpu->kvm;
2066 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2068 if (!kvm_s390_use_sca_entries())
2070 read_lock(&vcpu->kvm->arch.sca_lock);
2071 if (vcpu->kvm->arch.use_esca) {
2072 struct esca_block *sca = vcpu->kvm->arch.sca;
2074 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2075 sca->cpu[vcpu->vcpu_id].sda = 0;
2077 struct bsca_block *sca = vcpu->kvm->arch.sca;
2079 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2080 sca->cpu[vcpu->vcpu_id].sda = 0;
2082 read_unlock(&vcpu->kvm->arch.sca_lock);
2085 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2087 if (!kvm_s390_use_sca_entries()) {
2088 struct bsca_block *sca = vcpu->kvm->arch.sca;
2090 /* we still need the basic sca for the ipte control */
2091 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2092 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2094 read_lock(&vcpu->kvm->arch.sca_lock);
2095 if (vcpu->kvm->arch.use_esca) {
2096 struct esca_block *sca = vcpu->kvm->arch.sca;
2098 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2099 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2100 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2101 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2102 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2104 struct bsca_block *sca = vcpu->kvm->arch.sca;
2106 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2107 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2108 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2109 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2111 read_unlock(&vcpu->kvm->arch.sca_lock);
2114 /* Basic SCA to Extended SCA data copy routines */
2115 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2118 d->sigp_ctrl.c = s->sigp_ctrl.c;
2119 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2122 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2126 d->ipte_control = s->ipte_control;
2128 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2129 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2132 static int sca_switch_to_extended(struct kvm *kvm)
2134 struct bsca_block *old_sca = kvm->arch.sca;
2135 struct esca_block *new_sca;
2136 struct kvm_vcpu *vcpu;
2137 unsigned int vcpu_idx;
2140 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2144 scaoh = (u32)((u64)(new_sca) >> 32);
2145 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2147 kvm_s390_vcpu_block_all(kvm);
2148 write_lock(&kvm->arch.sca_lock);
2150 sca_copy_b_to_e(new_sca, old_sca);
2152 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2153 vcpu->arch.sie_block->scaoh = scaoh;
2154 vcpu->arch.sie_block->scaol = scaol;
2155 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2157 kvm->arch.sca = new_sca;
2158 kvm->arch.use_esca = 1;
2160 write_unlock(&kvm->arch.sca_lock);
2161 kvm_s390_vcpu_unblock_all(kvm);
2163 free_page((unsigned long)old_sca);
2165 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2166 old_sca, kvm->arch.sca);
2170 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2174 if (!kvm_s390_use_sca_entries()) {
2175 if (id < KVM_MAX_VCPUS)
2179 if (id < KVM_S390_BSCA_CPU_SLOTS)
2181 if (!sclp.has_esca || !sclp.has_64bscao)
2184 mutex_lock(&kvm->lock);
2185 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2186 mutex_unlock(&kvm->lock);
2188 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2191 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2193 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2194 kvm_clear_async_pf_completion_queue(vcpu);
2195 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2201 kvm_s390_set_prefix(vcpu, 0);
2202 if (test_kvm_facility(vcpu->kvm, 64))
2203 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2204 if (test_kvm_facility(vcpu->kvm, 133))
2205 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2206 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2207 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2210 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2212 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2214 if (kvm_is_ucontrol(vcpu->kvm))
2215 return __kvm_ucontrol_vcpu_init(vcpu);
2220 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2221 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2223 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2224 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2225 vcpu->arch.cputm_start = get_tod_clock_fast();
2226 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2229 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2230 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2232 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2233 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2234 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2235 vcpu->arch.cputm_start = 0;
2236 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2239 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2240 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2242 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2243 vcpu->arch.cputm_enabled = true;
2244 __start_cpu_timer_accounting(vcpu);
2247 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2248 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2250 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2251 __stop_cpu_timer_accounting(vcpu);
2252 vcpu->arch.cputm_enabled = false;
2255 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2257 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2258 __enable_cpu_timer_accounting(vcpu);
2262 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2264 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2265 __disable_cpu_timer_accounting(vcpu);
2269 /* set the cpu timer - may only be called from the VCPU thread itself */
2270 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2272 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2273 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2274 if (vcpu->arch.cputm_enabled)
2275 vcpu->arch.cputm_start = get_tod_clock_fast();
2276 vcpu->arch.sie_block->cputm = cputm;
2277 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2281 /* update and get the cpu timer - can also be called from other VCPU threads */
2282 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2287 if (unlikely(!vcpu->arch.cputm_enabled))
2288 return vcpu->arch.sie_block->cputm;
2290 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2292 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2294 * If the writer would ever execute a read in the critical
2295 * section, e.g. in irq context, we have a deadlock.
2297 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2298 value = vcpu->arch.sie_block->cputm;
2299 /* if cputm_start is 0, accounting is being started/stopped */
2300 if (likely(vcpu->arch.cputm_start))
2301 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2302 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2307 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2310 gmap_enable(vcpu->arch.enabled_gmap);
2311 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2312 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2313 __start_cpu_timer_accounting(vcpu);
2317 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2320 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2321 __stop_cpu_timer_accounting(vcpu);
2322 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2323 vcpu->arch.enabled_gmap = gmap_get_enabled();
2324 gmap_disable(vcpu->arch.enabled_gmap);
2328 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2330 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2331 vcpu->arch.sie_block->gpsw.mask = 0UL;
2332 vcpu->arch.sie_block->gpsw.addr = 0UL;
2333 kvm_s390_set_prefix(vcpu, 0);
2334 kvm_s390_set_cpu_timer(vcpu, 0);
2335 vcpu->arch.sie_block->ckc = 0UL;
2336 vcpu->arch.sie_block->todpr = 0;
2337 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2338 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2339 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2340 /* make sure the new fpc will be lazily loaded */
2342 current->thread.fpu.fpc = 0;
2343 vcpu->arch.sie_block->gbea = 1;
2344 vcpu->arch.sie_block->pp = 0;
2345 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2346 kvm_clear_async_pf_completion_queue(vcpu);
2347 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2348 kvm_s390_vcpu_stop(vcpu);
2349 kvm_s390_clear_local_irqs(vcpu);
2352 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2354 mutex_lock(&vcpu->kvm->lock);
2356 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2358 mutex_unlock(&vcpu->kvm->lock);
2359 if (!kvm_is_ucontrol(vcpu->kvm)) {
2360 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2363 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2364 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2365 /* make vcpu_load load the right gmap on the first trigger */
2366 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2369 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2371 if (!test_kvm_facility(vcpu->kvm, 76))
2374 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2376 if (vcpu->kvm->arch.crypto.aes_kw)
2377 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2378 if (vcpu->kvm->arch.crypto.dea_kw)
2379 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2381 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2384 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2386 free_page(vcpu->arch.sie_block->cbrlo);
2387 vcpu->arch.sie_block->cbrlo = 0;
2390 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2392 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2393 if (!vcpu->arch.sie_block->cbrlo)
2396 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2400 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2402 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2404 vcpu->arch.sie_block->ibc = model->ibc;
2405 if (test_kvm_facility(vcpu->kvm, 7))
2406 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2409 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2413 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2417 if (test_kvm_facility(vcpu->kvm, 78))
2418 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2419 else if (test_kvm_facility(vcpu->kvm, 8))
2420 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2422 kvm_s390_vcpu_setup_model(vcpu);
2424 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2425 if (MACHINE_HAS_ESOP)
2426 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2427 if (test_kvm_facility(vcpu->kvm, 9))
2428 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2429 if (test_kvm_facility(vcpu->kvm, 73))
2430 vcpu->arch.sie_block->ecb |= ECB_TE;
2432 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2433 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2434 if (test_kvm_facility(vcpu->kvm, 130))
2435 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2436 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2438 vcpu->arch.sie_block->eca |= ECA_CEI;
2440 vcpu->arch.sie_block->eca |= ECA_IB;
2442 vcpu->arch.sie_block->eca |= ECA_SII;
2443 if (sclp.has_sigpif)
2444 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2445 if (test_kvm_facility(vcpu->kvm, 129)) {
2446 vcpu->arch.sie_block->eca |= ECA_VX;
2447 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2449 if (test_kvm_facility(vcpu->kvm, 139))
2450 vcpu->arch.sie_block->ecd |= ECD_MEF;
2452 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2454 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2457 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2459 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2461 if (vcpu->kvm->arch.use_cmma) {
2462 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2466 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2467 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2469 kvm_s390_vcpu_crypto_setup(vcpu);
2474 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2477 struct kvm_vcpu *vcpu;
2478 struct sie_page *sie_page;
2481 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2486 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2490 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2491 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2495 vcpu->arch.sie_block = &sie_page->sie_block;
2496 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2498 /* the real guest size will always be smaller than msl */
2499 vcpu->arch.sie_block->mso = 0;
2500 vcpu->arch.sie_block->msl = sclp.hamax;
2502 vcpu->arch.sie_block->icpua = id;
2503 spin_lock_init(&vcpu->arch.local_int.lock);
2504 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2505 vcpu->arch.local_int.wq = &vcpu->wq;
2506 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2507 seqcount_init(&vcpu->arch.cputm_seqcount);
2509 rc = kvm_vcpu_init(vcpu, kvm, id);
2511 goto out_free_sie_block;
2512 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2513 vcpu->arch.sie_block);
2514 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2518 free_page((unsigned long)(vcpu->arch.sie_block));
2520 kmem_cache_free(kvm_vcpu_cache, vcpu);
2525 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2527 return kvm_s390_vcpu_has_irq(vcpu, 0);
2530 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2532 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2536 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2538 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2541 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2543 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2547 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2549 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2553 * Kick a guest cpu out of SIE and wait until SIE is not running.
2554 * If the CPU is not running (e.g. waiting as idle) the function will
2555 * return immediately. */
2556 void exit_sie(struct kvm_vcpu *vcpu)
2558 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2559 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2563 /* Kick a guest cpu out of SIE to process a request synchronously */
2564 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2566 kvm_make_request(req, vcpu);
2567 kvm_s390_vcpu_request(vcpu);
2570 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2573 struct kvm *kvm = gmap->private;
2574 struct kvm_vcpu *vcpu;
2575 unsigned long prefix;
2578 if (gmap_is_shadow(gmap))
2580 if (start >= 1UL << 31)
2581 /* We are only interested in prefix pages */
2583 kvm_for_each_vcpu(i, vcpu, kvm) {
2584 /* match against both prefix pages */
2585 prefix = kvm_s390_get_prefix(vcpu);
2586 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2587 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2589 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2594 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2596 /* kvm common code refers to this, but never calls it */
2601 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2602 struct kvm_one_reg *reg)
2607 case KVM_REG_S390_TODPR:
2608 r = put_user(vcpu->arch.sie_block->todpr,
2609 (u32 __user *)reg->addr);
2611 case KVM_REG_S390_EPOCHDIFF:
2612 r = put_user(vcpu->arch.sie_block->epoch,
2613 (u64 __user *)reg->addr);
2615 case KVM_REG_S390_CPU_TIMER:
2616 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2617 (u64 __user *)reg->addr);
2619 case KVM_REG_S390_CLOCK_COMP:
2620 r = put_user(vcpu->arch.sie_block->ckc,
2621 (u64 __user *)reg->addr);
2623 case KVM_REG_S390_PFTOKEN:
2624 r = put_user(vcpu->arch.pfault_token,
2625 (u64 __user *)reg->addr);
2627 case KVM_REG_S390_PFCOMPARE:
2628 r = put_user(vcpu->arch.pfault_compare,
2629 (u64 __user *)reg->addr);
2631 case KVM_REG_S390_PFSELECT:
2632 r = put_user(vcpu->arch.pfault_select,
2633 (u64 __user *)reg->addr);
2635 case KVM_REG_S390_PP:
2636 r = put_user(vcpu->arch.sie_block->pp,
2637 (u64 __user *)reg->addr);
2639 case KVM_REG_S390_GBEA:
2640 r = put_user(vcpu->arch.sie_block->gbea,
2641 (u64 __user *)reg->addr);
2650 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2651 struct kvm_one_reg *reg)
2657 case KVM_REG_S390_TODPR:
2658 r = get_user(vcpu->arch.sie_block->todpr,
2659 (u32 __user *)reg->addr);
2661 case KVM_REG_S390_EPOCHDIFF:
2662 r = get_user(vcpu->arch.sie_block->epoch,
2663 (u64 __user *)reg->addr);
2665 case KVM_REG_S390_CPU_TIMER:
2666 r = get_user(val, (u64 __user *)reg->addr);
2668 kvm_s390_set_cpu_timer(vcpu, val);
2670 case KVM_REG_S390_CLOCK_COMP:
2671 r = get_user(vcpu->arch.sie_block->ckc,
2672 (u64 __user *)reg->addr);
2674 case KVM_REG_S390_PFTOKEN:
2675 r = get_user(vcpu->arch.pfault_token,
2676 (u64 __user *)reg->addr);
2677 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2678 kvm_clear_async_pf_completion_queue(vcpu);
2680 case KVM_REG_S390_PFCOMPARE:
2681 r = get_user(vcpu->arch.pfault_compare,
2682 (u64 __user *)reg->addr);
2684 case KVM_REG_S390_PFSELECT:
2685 r = get_user(vcpu->arch.pfault_select,
2686 (u64 __user *)reg->addr);
2688 case KVM_REG_S390_PP:
2689 r = get_user(vcpu->arch.sie_block->pp,
2690 (u64 __user *)reg->addr);
2692 case KVM_REG_S390_GBEA:
2693 r = get_user(vcpu->arch.sie_block->gbea,
2694 (u64 __user *)reg->addr);
2703 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2705 kvm_s390_vcpu_initial_reset(vcpu);
2709 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2711 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2715 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2717 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2721 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2722 struct kvm_sregs *sregs)
2724 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2725 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2729 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2730 struct kvm_sregs *sregs)
2732 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2733 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2737 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2739 if (test_fp_ctl(fpu->fpc))
2741 vcpu->run->s.regs.fpc = fpu->fpc;
2743 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2744 (freg_t *) fpu->fprs);
2746 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2750 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2752 /* make sure we have the latest values */
2755 convert_vx_to_fp((freg_t *) fpu->fprs,
2756 (__vector128 *) vcpu->run->s.regs.vrs);
2758 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2759 fpu->fpc = vcpu->run->s.regs.fpc;
2763 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2767 if (!is_vcpu_stopped(vcpu))
2770 vcpu->run->psw_mask = psw.mask;
2771 vcpu->run->psw_addr = psw.addr;
2776 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2777 struct kvm_translation *tr)
2779 return -EINVAL; /* not implemented yet */
2782 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2783 KVM_GUESTDBG_USE_HW_BP | \
2784 KVM_GUESTDBG_ENABLE)
2786 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2787 struct kvm_guest_debug *dbg)
2791 vcpu->guest_debug = 0;
2792 kvm_s390_clear_bp_data(vcpu);
2794 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2796 if (!sclp.has_gpere)
2799 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2800 vcpu->guest_debug = dbg->control;
2801 /* enforce guest PER */
2802 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2804 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2805 rc = kvm_s390_import_bp_data(vcpu, dbg);
2807 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2808 vcpu->arch.guestdbg.last_bp = 0;
2812 vcpu->guest_debug = 0;
2813 kvm_s390_clear_bp_data(vcpu);
2814 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2820 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2821 struct kvm_mp_state *mp_state)
2823 /* CHECK_STOP and LOAD are not supported yet */
2824 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2825 KVM_MP_STATE_OPERATING;
2828 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2829 struct kvm_mp_state *mp_state)
2833 /* user space knows about this interface - let it control the state */
2834 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2836 switch (mp_state->mp_state) {
2837 case KVM_MP_STATE_STOPPED:
2838 kvm_s390_vcpu_stop(vcpu);
2840 case KVM_MP_STATE_OPERATING:
2841 kvm_s390_vcpu_start(vcpu);
2843 case KVM_MP_STATE_LOAD:
2844 case KVM_MP_STATE_CHECK_STOP:
2845 /* fall through - CHECK_STOP and LOAD are not supported yet */
2853 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2855 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2858 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2861 kvm_s390_vcpu_request_handled(vcpu);
2862 if (!kvm_request_pending(vcpu))
2865 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2866 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2867 * This ensures that the ipte instruction for this request has
2868 * already finished. We might race against a second unmapper that
2869 * wants to set the blocking bit. Lets just retry the request loop.
2871 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2873 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2874 kvm_s390_get_prefix(vcpu),
2875 PAGE_SIZE * 2, PROT_WRITE);
2877 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2883 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2884 vcpu->arch.sie_block->ihcpu = 0xffff;
2888 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2889 if (!ibs_enabled(vcpu)) {
2890 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2891 atomic_or(CPUSTAT_IBS,
2892 &vcpu->arch.sie_block->cpuflags);
2897 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2898 if (ibs_enabled(vcpu)) {
2899 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2900 atomic_andnot(CPUSTAT_IBS,
2901 &vcpu->arch.sie_block->cpuflags);
2906 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2907 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2911 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2913 * Disable CMMA virtualization; we will emulate the ESSA
2914 * instruction manually, in order to provide additional
2915 * functionalities needed for live migration.
2917 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2921 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2923 * Re-enable CMMA virtualization if CMMA is available and
2926 if ((vcpu->kvm->arch.use_cmma) &&
2927 (vcpu->kvm->mm->context.use_cmma))
2928 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2932 /* nothing to do, just clear the request */
2933 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2938 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2939 const struct kvm_s390_vm_tod_clock *gtod)
2941 struct kvm_vcpu *vcpu;
2942 struct kvm_s390_tod_clock_ext htod;
2945 mutex_lock(&kvm->lock);
2948 get_tod_clock_ext((char *)&htod);
2950 kvm->arch.epoch = gtod->tod - htod.tod;
2951 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2953 if (kvm->arch.epoch > gtod->tod)
2954 kvm->arch.epdx -= 1;
2956 kvm_s390_vcpu_block_all(kvm);
2957 kvm_for_each_vcpu(i, vcpu, kvm) {
2958 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2959 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
2962 kvm_s390_vcpu_unblock_all(kvm);
2964 mutex_unlock(&kvm->lock);
2967 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2969 struct kvm_vcpu *vcpu;
2972 mutex_lock(&kvm->lock);
2974 kvm->arch.epoch = tod - get_tod_clock();
2975 kvm_s390_vcpu_block_all(kvm);
2976 kvm_for_each_vcpu(i, vcpu, kvm)
2977 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2978 kvm_s390_vcpu_unblock_all(kvm);
2980 mutex_unlock(&kvm->lock);
2984 * kvm_arch_fault_in_page - fault-in guest page if necessary
2985 * @vcpu: The corresponding virtual cpu
2986 * @gpa: Guest physical address
2987 * @writable: Whether the page should be writable or not
2989 * Make sure that a guest page has been faulted-in on the host.
2991 * Return: Zero on success, negative error code otherwise.
2993 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2995 return gmap_fault(vcpu->arch.gmap, gpa,
2996 writable ? FAULT_FLAG_WRITE : 0);
2999 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3000 unsigned long token)
3002 struct kvm_s390_interrupt inti;
3003 struct kvm_s390_irq irq;
3006 irq.u.ext.ext_params2 = token;
3007 irq.type = KVM_S390_INT_PFAULT_INIT;
3008 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3010 inti.type = KVM_S390_INT_PFAULT_DONE;
3011 inti.parm64 = token;
3012 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3016 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3017 struct kvm_async_pf *work)
3019 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3020 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3023 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3024 struct kvm_async_pf *work)
3026 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3027 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3030 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3031 struct kvm_async_pf *work)
3033 /* s390 will always inject the page directly */
3036 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3039 * s390 will always inject the page directly,
3040 * but we still want check_async_completion to cleanup
3045 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3048 struct kvm_arch_async_pf arch;
3051 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3053 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3054 vcpu->arch.pfault_compare)
3056 if (psw_extint_disabled(vcpu))
3058 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3060 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3062 if (!vcpu->arch.gmap->pfault_enabled)
3065 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3066 hva += current->thread.gmap_addr & ~PAGE_MASK;
3067 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3070 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3074 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3079 * On s390 notifications for arriving pages will be delivered directly
3080 * to the guest but the house keeping for completed pfaults is
3081 * handled outside the worker.
3083 kvm_check_async_pf_completion(vcpu);
3085 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3086 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3091 if (test_cpu_flag(CIF_MCCK_PENDING))
3094 if (!kvm_is_ucontrol(vcpu->kvm)) {
3095 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3100 rc = kvm_s390_handle_requests(vcpu);
3104 if (guestdbg_enabled(vcpu)) {
3105 kvm_s390_backup_guest_per_regs(vcpu);
3106 kvm_s390_patch_guest_per_regs(vcpu);
3109 vcpu->arch.sie_block->icptcode = 0;
3110 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3111 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3112 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3117 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3119 struct kvm_s390_pgm_info pgm_info = {
3120 .code = PGM_ADDRESSING,
3125 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3126 trace_kvm_s390_sie_fault(vcpu);
3129 * We want to inject an addressing exception, which is defined as a
3130 * suppressing or terminating exception. However, since we came here
3131 * by a DAT access exception, the PSW still points to the faulting
3132 * instruction since DAT exceptions are nullifying. So we've got
3133 * to look up the current opcode to get the length of the instruction
3134 * to be able to forward the PSW.
3136 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3137 ilen = insn_length(opcode);
3141 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3142 * Forward by arbitrary ilc, injection will take care of
3143 * nullification if necessary.
3145 pgm_info = vcpu->arch.pgm;
3148 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3149 kvm_s390_forward_psw(vcpu, ilen);
3150 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3153 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3155 struct mcck_volatile_info *mcck_info;
3156 struct sie_page *sie_page;
3158 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3159 vcpu->arch.sie_block->icptcode);
3160 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3162 if (guestdbg_enabled(vcpu))
3163 kvm_s390_restore_guest_per_regs(vcpu);
3165 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3166 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3168 if (exit_reason == -EINTR) {
3169 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3170 sie_page = container_of(vcpu->arch.sie_block,
3171 struct sie_page, sie_block);
3172 mcck_info = &sie_page->mcck_info;
3173 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3177 if (vcpu->arch.sie_block->icptcode > 0) {
3178 int rc = kvm_handle_sie_intercept(vcpu);
3180 if (rc != -EOPNOTSUPP)
3182 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3183 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3184 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3185 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3187 } else if (exit_reason != -EFAULT) {
3188 vcpu->stat.exit_null++;
3190 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3191 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3192 vcpu->run->s390_ucontrol.trans_exc_code =
3193 current->thread.gmap_addr;
3194 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3196 } else if (current->thread.gmap_pfault) {
3197 trace_kvm_s390_major_guest_pfault(vcpu);
3198 current->thread.gmap_pfault = 0;
3199 if (kvm_arch_setup_async_pf(vcpu))
3201 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3203 return vcpu_post_run_fault_in_sie(vcpu);
3206 static int __vcpu_run(struct kvm_vcpu *vcpu)
3208 int rc, exit_reason;
3211 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3212 * ning the guest), so that memslots (and other stuff) are protected
3214 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3217 rc = vcpu_pre_run(vcpu);
3221 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3223 * As PF_VCPU will be used in fault handler, between
3224 * guest_enter and guest_exit should be no uaccess.
3226 local_irq_disable();
3227 guest_enter_irqoff();
3228 __disable_cpu_timer_accounting(vcpu);
3230 exit_reason = sie64a(vcpu->arch.sie_block,
3231 vcpu->run->s.regs.gprs);
3232 local_irq_disable();
3233 __enable_cpu_timer_accounting(vcpu);
3234 guest_exit_irqoff();
3236 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3238 rc = vcpu_post_run(vcpu, exit_reason);
3239 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3241 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3245 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3247 struct runtime_instr_cb *riccb;
3250 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3251 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3252 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3253 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3254 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3255 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3256 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3257 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3258 /* some control register changes require a tlb flush */
3259 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3261 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3262 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3263 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3264 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3265 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3266 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3268 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3269 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3270 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3271 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3272 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3273 kvm_clear_async_pf_completion_queue(vcpu);
3276 * If userspace sets the riccb (e.g. after migration) to a valid state,
3277 * we should enable RI here instead of doing the lazy enablement.
3279 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3280 test_kvm_facility(vcpu->kvm, 64) &&
3282 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3283 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3284 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3287 * If userspace sets the gscb (e.g. after migration) to non-zero,
3288 * we should enable GS here instead of doing the lazy enablement.
3290 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3291 test_kvm_facility(vcpu->kvm, 133) &&
3293 !vcpu->arch.gs_enabled) {
3294 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3295 vcpu->arch.sie_block->ecb |= ECB_GS;
3296 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3297 vcpu->arch.gs_enabled = 1;
3299 save_access_regs(vcpu->arch.host_acrs);
3300 restore_access_regs(vcpu->run->s.regs.acrs);
3301 /* save host (userspace) fprs/vrs */
3303 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3304 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3306 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3308 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3309 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3310 if (test_fp_ctl(current->thread.fpu.fpc))
3311 /* User space provided an invalid FPC, let's clear it */
3312 current->thread.fpu.fpc = 0;
3313 if (MACHINE_HAS_GS) {
3315 __ctl_set_bit(2, 4);
3316 if (current->thread.gs_cb) {
3317 vcpu->arch.host_gscb = current->thread.gs_cb;
3318 save_gs_cb(vcpu->arch.host_gscb);
3320 if (vcpu->arch.gs_enabled) {
3321 current->thread.gs_cb = (struct gs_cb *)
3322 &vcpu->run->s.regs.gscb;
3323 restore_gs_cb(current->thread.gs_cb);
3328 kvm_run->kvm_dirty_regs = 0;
3331 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3333 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3334 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3335 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3336 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3337 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3338 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3339 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3340 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3341 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3342 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3343 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3344 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3345 save_access_regs(vcpu->run->s.regs.acrs);
3346 restore_access_regs(vcpu->arch.host_acrs);
3347 /* Save guest register state */
3349 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3350 /* Restore will be done lazily at return */
3351 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3352 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3353 if (MACHINE_HAS_GS) {
3354 __ctl_set_bit(2, 4);
3355 if (vcpu->arch.gs_enabled)
3356 save_gs_cb(current->thread.gs_cb);
3358 current->thread.gs_cb = vcpu->arch.host_gscb;
3359 restore_gs_cb(vcpu->arch.host_gscb);
3361 if (!vcpu->arch.host_gscb)
3362 __ctl_clear_bit(2, 4);
3363 vcpu->arch.host_gscb = NULL;
3368 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3373 if (kvm_run->immediate_exit)
3376 if (guestdbg_exit_pending(vcpu)) {
3377 kvm_s390_prepare_debug_exit(vcpu);
3381 if (vcpu->sigset_active)
3382 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3384 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3385 kvm_s390_vcpu_start(vcpu);
3386 } else if (is_vcpu_stopped(vcpu)) {
3387 pr_err_ratelimited("can't run stopped vcpu %d\n",
3392 sync_regs(vcpu, kvm_run);
3393 enable_cpu_timer_accounting(vcpu);
3396 rc = __vcpu_run(vcpu);
3398 if (signal_pending(current) && !rc) {
3399 kvm_run->exit_reason = KVM_EXIT_INTR;
3403 if (guestdbg_exit_pending(vcpu) && !rc) {
3404 kvm_s390_prepare_debug_exit(vcpu);
3408 if (rc == -EREMOTE) {
3409 /* userspace support is needed, kvm_run has been prepared */
3413 disable_cpu_timer_accounting(vcpu);
3414 store_regs(vcpu, kvm_run);
3416 if (vcpu->sigset_active)
3417 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3419 vcpu->stat.exit_userspace++;
3424 * store status at address
3425 * we use have two special cases:
3426 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3427 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3429 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3431 unsigned char archmode = 1;
3432 freg_t fprs[NUM_FPRS];
3437 px = kvm_s390_get_prefix(vcpu);
3438 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3439 if (write_guest_abs(vcpu, 163, &archmode, 1))
3442 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3443 if (write_guest_real(vcpu, 163, &archmode, 1))
3447 gpa -= __LC_FPREGS_SAVE_AREA;
3449 /* manually convert vector registers if necessary */
3450 if (MACHINE_HAS_VX) {
3451 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3452 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3455 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3456 vcpu->run->s.regs.fprs, 128);
3458 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3459 vcpu->run->s.regs.gprs, 128);
3460 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3461 &vcpu->arch.sie_block->gpsw, 16);
3462 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3464 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3465 &vcpu->run->s.regs.fpc, 4);
3466 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3467 &vcpu->arch.sie_block->todpr, 4);
3468 cputm = kvm_s390_get_cpu_timer(vcpu);
3469 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3471 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3472 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3474 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3475 &vcpu->run->s.regs.acrs, 64);
3476 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3477 &vcpu->arch.sie_block->gcr, 128);
3478 return rc ? -EFAULT : 0;
3481 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3484 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3485 * switch in the run ioctl. Let's update our copies before we save
3486 * it into the save area
3489 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3490 save_access_regs(vcpu->run->s.regs.acrs);
3492 return kvm_s390_store_status_unloaded(vcpu, addr);
3495 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3497 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3498 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3501 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3504 struct kvm_vcpu *vcpu;
3506 kvm_for_each_vcpu(i, vcpu, kvm) {
3507 __disable_ibs_on_vcpu(vcpu);
3511 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3515 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3516 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3519 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3521 int i, online_vcpus, started_vcpus = 0;
3523 if (!is_vcpu_stopped(vcpu))
3526 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3527 /* Only one cpu at a time may enter/leave the STOPPED state. */
3528 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3529 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3531 for (i = 0; i < online_vcpus; i++) {
3532 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3536 if (started_vcpus == 0) {
3537 /* we're the only active VCPU -> speed it up */
3538 __enable_ibs_on_vcpu(vcpu);
3539 } else if (started_vcpus == 1) {
3541 * As we are starting a second VCPU, we have to disable
3542 * the IBS facility on all VCPUs to remove potentially
3543 * oustanding ENABLE requests.
3545 __disable_ibs_on_all_vcpus(vcpu->kvm);
3548 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3550 * Another VCPU might have used IBS while we were offline.
3551 * Let's play safe and flush the VCPU at startup.
3553 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3554 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3558 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3560 int i, online_vcpus, started_vcpus = 0;
3561 struct kvm_vcpu *started_vcpu = NULL;
3563 if (is_vcpu_stopped(vcpu))
3566 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3567 /* Only one cpu at a time may enter/leave the STOPPED state. */
3568 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3569 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3571 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3572 kvm_s390_clear_stop_irq(vcpu);
3574 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3575 __disable_ibs_on_vcpu(vcpu);
3577 for (i = 0; i < online_vcpus; i++) {
3578 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3580 started_vcpu = vcpu->kvm->vcpus[i];
3584 if (started_vcpus == 1) {
3586 * As we only have one VCPU left, we want to enable the
3587 * IBS facility for that VCPU to speed it up.
3589 __enable_ibs_on_vcpu(started_vcpu);
3592 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3596 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3597 struct kvm_enable_cap *cap)
3605 case KVM_CAP_S390_CSS_SUPPORT:
3606 if (!vcpu->kvm->arch.css_support) {
3607 vcpu->kvm->arch.css_support = 1;
3608 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3609 trace_kvm_s390_enable_css(vcpu->kvm);
3620 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3621 struct kvm_s390_mem_op *mop)
3623 void __user *uaddr = (void __user *)mop->buf;
3624 void *tmpbuf = NULL;
3626 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3627 | KVM_S390_MEMOP_F_CHECK_ONLY;
3629 if (mop->flags & ~supported_flags)
3632 if (mop->size > MEM_OP_MAX_SIZE)
3635 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3636 tmpbuf = vmalloc(mop->size);
3641 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3644 case KVM_S390_MEMOP_LOGICAL_READ:
3645 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3646 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3647 mop->size, GACC_FETCH);
3650 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3652 if (copy_to_user(uaddr, tmpbuf, mop->size))
3656 case KVM_S390_MEMOP_LOGICAL_WRITE:
3657 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3658 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3659 mop->size, GACC_STORE);
3662 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3666 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3672 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3674 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3675 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3681 long kvm_arch_vcpu_ioctl(struct file *filp,
3682 unsigned int ioctl, unsigned long arg)
3684 struct kvm_vcpu *vcpu = filp->private_data;
3685 void __user *argp = (void __user *)arg;
3690 case KVM_S390_IRQ: {
3691 struct kvm_s390_irq s390irq;
3694 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3696 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3699 case KVM_S390_INTERRUPT: {
3700 struct kvm_s390_interrupt s390int;
3701 struct kvm_s390_irq s390irq;
3704 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3706 if (s390int_to_s390irq(&s390int, &s390irq))
3708 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3711 case KVM_S390_STORE_STATUS:
3712 idx = srcu_read_lock(&vcpu->kvm->srcu);
3713 r = kvm_s390_vcpu_store_status(vcpu, arg);
3714 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3716 case KVM_S390_SET_INITIAL_PSW: {
3720 if (copy_from_user(&psw, argp, sizeof(psw)))
3722 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3725 case KVM_S390_INITIAL_RESET:
3726 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3728 case KVM_SET_ONE_REG:
3729 case KVM_GET_ONE_REG: {
3730 struct kvm_one_reg reg;
3732 if (copy_from_user(®, argp, sizeof(reg)))
3734 if (ioctl == KVM_SET_ONE_REG)
3735 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3737 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3740 #ifdef CONFIG_KVM_S390_UCONTROL
3741 case KVM_S390_UCAS_MAP: {
3742 struct kvm_s390_ucas_mapping ucasmap;
3744 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3749 if (!kvm_is_ucontrol(vcpu->kvm)) {
3754 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3755 ucasmap.vcpu_addr, ucasmap.length);
3758 case KVM_S390_UCAS_UNMAP: {
3759 struct kvm_s390_ucas_mapping ucasmap;
3761 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3766 if (!kvm_is_ucontrol(vcpu->kvm)) {
3771 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3776 case KVM_S390_VCPU_FAULT: {
3777 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3780 case KVM_ENABLE_CAP:
3782 struct kvm_enable_cap cap;
3784 if (copy_from_user(&cap, argp, sizeof(cap)))
3786 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3789 case KVM_S390_MEM_OP: {
3790 struct kvm_s390_mem_op mem_op;
3792 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3793 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3798 case KVM_S390_SET_IRQ_STATE: {
3799 struct kvm_s390_irq_state irq_state;
3802 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3804 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3805 irq_state.len == 0 ||
3806 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3810 r = kvm_s390_set_irq_state(vcpu,
3811 (void __user *) irq_state.buf,
3815 case KVM_S390_GET_IRQ_STATE: {
3816 struct kvm_s390_irq_state irq_state;
3819 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3821 if (irq_state.len == 0) {
3825 r = kvm_s390_get_irq_state(vcpu,
3826 (__u8 __user *) irq_state.buf,
3836 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3838 #ifdef CONFIG_KVM_S390_UCONTROL
3839 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3840 && (kvm_is_ucontrol(vcpu->kvm))) {
3841 vmf->page = virt_to_page(vcpu->arch.sie_block);
3842 get_page(vmf->page);
3846 return VM_FAULT_SIGBUS;
3849 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3850 unsigned long npages)
3855 /* Section: memory related */
3856 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3857 struct kvm_memory_slot *memslot,
3858 const struct kvm_userspace_memory_region *mem,
3859 enum kvm_mr_change change)
3861 /* A few sanity checks. We can have memory slots which have to be
3862 located/ended at a segment boundary (1MB). The memory in userland is
3863 ok to be fragmented into various different vmas. It is okay to mmap()
3864 and munmap() stuff in this slot after doing this call at any time */
3866 if (mem->userspace_addr & 0xffffful)
3869 if (mem->memory_size & 0xffffful)
3872 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3878 void kvm_arch_commit_memory_region(struct kvm *kvm,
3879 const struct kvm_userspace_memory_region *mem,
3880 const struct kvm_memory_slot *old,
3881 const struct kvm_memory_slot *new,
3882 enum kvm_mr_change change)
3886 /* If the basics of the memslot do not change, we do not want
3887 * to update the gmap. Every update causes several unnecessary
3888 * segment translation exceptions. This is usually handled just
3889 * fine by the normal fault handler + gmap, but it will also
3890 * cause faults on the prefix page of running guest CPUs.
3892 if (old->userspace_addr == mem->userspace_addr &&
3893 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3894 old->npages * PAGE_SIZE == mem->memory_size)
3897 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3898 mem->guest_phys_addr, mem->memory_size);
3900 pr_warn("failed to commit memory region\n");
3904 static inline unsigned long nonhyp_mask(int i)
3906 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3908 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3911 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3913 vcpu->valid_wakeup = false;
3916 static int __init kvm_s390_init(void)
3920 if (!sclp.has_sief2) {
3921 pr_info("SIE not available\n");
3925 for (i = 0; i < 16; i++)
3926 kvm_s390_fac_list_mask[i] |=
3927 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3929 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3932 static void __exit kvm_s390_exit(void)
3937 module_init(kvm_s390_init);
3938 module_exit(kvm_s390_exit);
3941 * Enable autoloading of the kvm module.
3942 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3943 * since x86 takes a different approach.
3945 #include <linux/miscdevice.h>
3946 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3947 MODULE_ALIAS("devname:kvm");