1371dff2b90d14244a52da555d9d4914a1be2f45
[sfrench/cifs-2.6.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2017
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62         { "userspace_handled", VCPU_STAT(exit_userspace) },
63         { "exit_null", VCPU_STAT(exit_null) },
64         { "exit_validity", VCPU_STAT(exit_validity) },
65         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66         { "exit_external_request", VCPU_STAT(exit_external_request) },
67         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68         { "exit_instruction", VCPU_STAT(exit_instruction) },
69         { "exit_pei", VCPU_STAT(exit_pei) },
70         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92         { "instruction_spx", VCPU_STAT(instruction_spx) },
93         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94         { "instruction_stap", VCPU_STAT(instruction_stap) },
95         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99         { "instruction_essa", VCPU_STAT(instruction_essa) },
100         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104         { "instruction_sie", VCPU_STAT(instruction_sie) },
105         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121         { "diagnose_10", VCPU_STAT(diagnose_10) },
122         { "diagnose_44", VCPU_STAT(diagnose_44) },
123         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124         { "diagnose_258", VCPU_STAT(diagnose_258) },
125         { "diagnose_308", VCPU_STAT(diagnose_308) },
126         { "diagnose_500", VCPU_STAT(diagnose_500) },
127         { NULL }
128 };
129
130 struct kvm_s390_tod_clock_ext {
131         __u8 epoch_idx;
132         __u64 tod;
133         __u8 reserved[7];
134 } __packed;
135
136 /* allow nested virtualization in KVM (if enabled by user space) */
137 static int nested;
138 module_param(nested, int, S_IRUGO);
139 MODULE_PARM_DESC(nested, "Nested virtualization support");
140
141 /* upper facilities limit for kvm */
142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
143
144 unsigned long kvm_s390_fac_list_mask_size(void)
145 {
146         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
147         return ARRAY_SIZE(kvm_s390_fac_list_mask);
148 }
149
150 /* available cpu features supported by kvm */
151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
152 /* available subfunctions indicated via query / "test bit" */
153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
154
155 static struct gmap_notifier gmap_notifier;
156 static struct gmap_notifier vsie_gmap_notifier;
157 debug_info_t *kvm_s390_dbf;
158
159 /* Section: not file related */
160 int kvm_arch_hardware_enable(void)
161 {
162         /* every s390 is virtualization enabled ;-) */
163         return 0;
164 }
165
166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
167                               unsigned long end);
168
169 /*
170  * This callback is executed during stop_machine(). All CPUs are therefore
171  * temporarily stopped. In order not to change guest behavior, we have to
172  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
173  * so a CPU won't be stopped while calculating with the epoch.
174  */
175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
176                           void *v)
177 {
178         struct kvm *kvm;
179         struct kvm_vcpu *vcpu;
180         int i;
181         unsigned long long *delta = v;
182
183         list_for_each_entry(kvm, &vm_list, vm_list) {
184                 kvm->arch.epoch -= *delta;
185                 kvm_for_each_vcpu(i, vcpu, kvm) {
186                         vcpu->arch.sie_block->epoch -= *delta;
187                         if (vcpu->arch.cputm_enabled)
188                                 vcpu->arch.cputm_start += *delta;
189                         if (vcpu->arch.vsie_block)
190                                 vcpu->arch.vsie_block->epoch -= *delta;
191                 }
192         }
193         return NOTIFY_OK;
194 }
195
196 static struct notifier_block kvm_clock_notifier = {
197         .notifier_call = kvm_clock_sync,
198 };
199
200 int kvm_arch_hardware_setup(void)
201 {
202         gmap_notifier.notifier_call = kvm_gmap_notifier;
203         gmap_register_pte_notifier(&gmap_notifier);
204         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
205         gmap_register_pte_notifier(&vsie_gmap_notifier);
206         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
207                                        &kvm_clock_notifier);
208         return 0;
209 }
210
211 void kvm_arch_hardware_unsetup(void)
212 {
213         gmap_unregister_pte_notifier(&gmap_notifier);
214         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
215         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
216                                          &kvm_clock_notifier);
217 }
218
219 static void allow_cpu_feat(unsigned long nr)
220 {
221         set_bit_inv(nr, kvm_s390_available_cpu_feat);
222 }
223
224 static inline int plo_test_bit(unsigned char nr)
225 {
226         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
227         int cc;
228
229         asm volatile(
230                 /* Parameter registers are ignored for "test bit" */
231                 "       plo     0,0,0,0(0)\n"
232                 "       ipm     %0\n"
233                 "       srl     %0,28\n"
234                 : "=d" (cc)
235                 : "d" (r0)
236                 : "cc");
237         return cc == 0;
238 }
239
240 static void kvm_s390_cpu_feat_init(void)
241 {
242         int i;
243
244         for (i = 0; i < 256; ++i) {
245                 if (plo_test_bit(i))
246                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
247         }
248
249         if (test_facility(28)) /* TOD-clock steering */
250                 ptff(kvm_s390_available_subfunc.ptff,
251                      sizeof(kvm_s390_available_subfunc.ptff),
252                      PTFF_QAF);
253
254         if (test_facility(17)) { /* MSA */
255                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
256                               kvm_s390_available_subfunc.kmac);
257                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
258                               kvm_s390_available_subfunc.kmc);
259                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
260                               kvm_s390_available_subfunc.km);
261                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
262                               kvm_s390_available_subfunc.kimd);
263                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.klmd);
265         }
266         if (test_facility(76)) /* MSA3 */
267                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.pckmo);
269         if (test_facility(77)) { /* MSA4 */
270                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
271                               kvm_s390_available_subfunc.kmctr);
272                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
273                               kvm_s390_available_subfunc.kmf);
274                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
275                               kvm_s390_available_subfunc.kmo);
276                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
277                               kvm_s390_available_subfunc.pcc);
278         }
279         if (test_facility(57)) /* MSA5 */
280                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
281                               kvm_s390_available_subfunc.ppno);
282
283         if (test_facility(146)) /* MSA8 */
284                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
285                               kvm_s390_available_subfunc.kma);
286
287         if (MACHINE_HAS_ESOP)
288                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
289         /*
290          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
291          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
292          */
293         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
294             !test_facility(3) || !nested)
295                 return;
296         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
297         if (sclp.has_64bscao)
298                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
299         if (sclp.has_siif)
300                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
301         if (sclp.has_gpere)
302                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
303         if (sclp.has_gsls)
304                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
305         if (sclp.has_ib)
306                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
307         if (sclp.has_cei)
308                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
309         if (sclp.has_ibs)
310                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
311         if (sclp.has_kss)
312                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
313         /*
314          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
315          * all skey handling functions read/set the skey from the PGSTE
316          * instead of the real storage key.
317          *
318          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
319          * pages being detected as preserved although they are resident.
320          *
321          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
322          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
323          *
324          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
325          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
326          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
327          *
328          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
329          * cannot easily shadow the SCA because of the ipte lock.
330          */
331 }
332
333 int kvm_arch_init(void *opaque)
334 {
335         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
336         if (!kvm_s390_dbf)
337                 return -ENOMEM;
338
339         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
340                 debug_unregister(kvm_s390_dbf);
341                 return -ENOMEM;
342         }
343
344         kvm_s390_cpu_feat_init();
345
346         /* Register floating interrupt controller interface. */
347         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
348 }
349
350 void kvm_arch_exit(void)
351 {
352         debug_unregister(kvm_s390_dbf);
353 }
354
355 /* Section: device related */
356 long kvm_arch_dev_ioctl(struct file *filp,
357                         unsigned int ioctl, unsigned long arg)
358 {
359         if (ioctl == KVM_S390_ENABLE_SIE)
360                 return s390_enable_sie();
361         return -EINVAL;
362 }
363
364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
365 {
366         int r;
367
368         switch (ext) {
369         case KVM_CAP_S390_PSW:
370         case KVM_CAP_S390_GMAP:
371         case KVM_CAP_SYNC_MMU:
372 #ifdef CONFIG_KVM_S390_UCONTROL
373         case KVM_CAP_S390_UCONTROL:
374 #endif
375         case KVM_CAP_ASYNC_PF:
376         case KVM_CAP_SYNC_REGS:
377         case KVM_CAP_ONE_REG:
378         case KVM_CAP_ENABLE_CAP:
379         case KVM_CAP_S390_CSS_SUPPORT:
380         case KVM_CAP_IOEVENTFD:
381         case KVM_CAP_DEVICE_CTRL:
382         case KVM_CAP_ENABLE_CAP_VM:
383         case KVM_CAP_S390_IRQCHIP:
384         case KVM_CAP_VM_ATTRIBUTES:
385         case KVM_CAP_MP_STATE:
386         case KVM_CAP_IMMEDIATE_EXIT:
387         case KVM_CAP_S390_INJECT_IRQ:
388         case KVM_CAP_S390_USER_SIGP:
389         case KVM_CAP_S390_USER_STSI:
390         case KVM_CAP_S390_SKEYS:
391         case KVM_CAP_S390_IRQ_STATE:
392         case KVM_CAP_S390_USER_INSTR0:
393         case KVM_CAP_S390_CMMA_MIGRATION:
394         case KVM_CAP_S390_AIS:
395         case KVM_CAP_S390_AIS_MIGRATION:
396                 r = 1;
397                 break;
398         case KVM_CAP_S390_MEM_OP:
399                 r = MEM_OP_MAX_SIZE;
400                 break;
401         case KVM_CAP_NR_VCPUS:
402         case KVM_CAP_MAX_VCPUS:
403                 r = KVM_S390_BSCA_CPU_SLOTS;
404                 if (!kvm_s390_use_sca_entries())
405                         r = KVM_MAX_VCPUS;
406                 else if (sclp.has_esca && sclp.has_64bscao)
407                         r = KVM_S390_ESCA_CPU_SLOTS;
408                 break;
409         case KVM_CAP_NR_MEMSLOTS:
410                 r = KVM_USER_MEM_SLOTS;
411                 break;
412         case KVM_CAP_S390_COW:
413                 r = MACHINE_HAS_ESOP;
414                 break;
415         case KVM_CAP_S390_VECTOR_REGISTERS:
416                 r = MACHINE_HAS_VX;
417                 break;
418         case KVM_CAP_S390_RI:
419                 r = test_facility(64);
420                 break;
421         case KVM_CAP_S390_GS:
422                 r = test_facility(133);
423                 break;
424         case KVM_CAP_S390_BPB:
425                 r = test_facility(82);
426                 break;
427         default:
428                 r = 0;
429         }
430         return r;
431 }
432
433 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
434                                         struct kvm_memory_slot *memslot)
435 {
436         gfn_t cur_gfn, last_gfn;
437         unsigned long address;
438         struct gmap *gmap = kvm->arch.gmap;
439
440         /* Loop over all guest pages */
441         last_gfn = memslot->base_gfn + memslot->npages;
442         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
443                 address = gfn_to_hva_memslot(memslot, cur_gfn);
444
445                 if (test_and_clear_guest_dirty(gmap->mm, address))
446                         mark_page_dirty(kvm, cur_gfn);
447                 if (fatal_signal_pending(current))
448                         return;
449                 cond_resched();
450         }
451 }
452
453 /* Section: vm related */
454 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
455
456 /*
457  * Get (and clear) the dirty memory log for a memory slot.
458  */
459 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
460                                struct kvm_dirty_log *log)
461 {
462         int r;
463         unsigned long n;
464         struct kvm_memslots *slots;
465         struct kvm_memory_slot *memslot;
466         int is_dirty = 0;
467
468         if (kvm_is_ucontrol(kvm))
469                 return -EINVAL;
470
471         mutex_lock(&kvm->slots_lock);
472
473         r = -EINVAL;
474         if (log->slot >= KVM_USER_MEM_SLOTS)
475                 goto out;
476
477         slots = kvm_memslots(kvm);
478         memslot = id_to_memslot(slots, log->slot);
479         r = -ENOENT;
480         if (!memslot->dirty_bitmap)
481                 goto out;
482
483         kvm_s390_sync_dirty_log(kvm, memslot);
484         r = kvm_get_dirty_log(kvm, log, &is_dirty);
485         if (r)
486                 goto out;
487
488         /* Clear the dirty log */
489         if (is_dirty) {
490                 n = kvm_dirty_bitmap_bytes(memslot);
491                 memset(memslot->dirty_bitmap, 0, n);
492         }
493         r = 0;
494 out:
495         mutex_unlock(&kvm->slots_lock);
496         return r;
497 }
498
499 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
500 {
501         unsigned int i;
502         struct kvm_vcpu *vcpu;
503
504         kvm_for_each_vcpu(i, vcpu, kvm) {
505                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
506         }
507 }
508
509 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
510 {
511         int r;
512
513         if (cap->flags)
514                 return -EINVAL;
515
516         switch (cap->cap) {
517         case KVM_CAP_S390_IRQCHIP:
518                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
519                 kvm->arch.use_irqchip = 1;
520                 r = 0;
521                 break;
522         case KVM_CAP_S390_USER_SIGP:
523                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
524                 kvm->arch.user_sigp = 1;
525                 r = 0;
526                 break;
527         case KVM_CAP_S390_VECTOR_REGISTERS:
528                 mutex_lock(&kvm->lock);
529                 if (kvm->created_vcpus) {
530                         r = -EBUSY;
531                 } else if (MACHINE_HAS_VX) {
532                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
533                         set_kvm_facility(kvm->arch.model.fac_list, 129);
534                         if (test_facility(134)) {
535                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
536                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
537                         }
538                         if (test_facility(135)) {
539                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
540                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
541                         }
542                         r = 0;
543                 } else
544                         r = -EINVAL;
545                 mutex_unlock(&kvm->lock);
546                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
547                          r ? "(not available)" : "(success)");
548                 break;
549         case KVM_CAP_S390_RI:
550                 r = -EINVAL;
551                 mutex_lock(&kvm->lock);
552                 if (kvm->created_vcpus) {
553                         r = -EBUSY;
554                 } else if (test_facility(64)) {
555                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
556                         set_kvm_facility(kvm->arch.model.fac_list, 64);
557                         r = 0;
558                 }
559                 mutex_unlock(&kvm->lock);
560                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
561                          r ? "(not available)" : "(success)");
562                 break;
563         case KVM_CAP_S390_AIS:
564                 mutex_lock(&kvm->lock);
565                 if (kvm->created_vcpus) {
566                         r = -EBUSY;
567                 } else {
568                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
569                         set_kvm_facility(kvm->arch.model.fac_list, 72);
570                         r = 0;
571                 }
572                 mutex_unlock(&kvm->lock);
573                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
574                          r ? "(not available)" : "(success)");
575                 break;
576         case KVM_CAP_S390_GS:
577                 r = -EINVAL;
578                 mutex_lock(&kvm->lock);
579                 if (atomic_read(&kvm->online_vcpus)) {
580                         r = -EBUSY;
581                 } else if (test_facility(133)) {
582                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
583                         set_kvm_facility(kvm->arch.model.fac_list, 133);
584                         r = 0;
585                 }
586                 mutex_unlock(&kvm->lock);
587                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
588                          r ? "(not available)" : "(success)");
589                 break;
590         case KVM_CAP_S390_USER_STSI:
591                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
592                 kvm->arch.user_stsi = 1;
593                 r = 0;
594                 break;
595         case KVM_CAP_S390_USER_INSTR0:
596                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
597                 kvm->arch.user_instr0 = 1;
598                 icpt_operexc_on_all_vcpus(kvm);
599                 r = 0;
600                 break;
601         default:
602                 r = -EINVAL;
603                 break;
604         }
605         return r;
606 }
607
608 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
609 {
610         int ret;
611
612         switch (attr->attr) {
613         case KVM_S390_VM_MEM_LIMIT_SIZE:
614                 ret = 0;
615                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
616                          kvm->arch.mem_limit);
617                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
618                         ret = -EFAULT;
619                 break;
620         default:
621                 ret = -ENXIO;
622                 break;
623         }
624         return ret;
625 }
626
627 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
628 {
629         int ret;
630         unsigned int idx;
631         switch (attr->attr) {
632         case KVM_S390_VM_MEM_ENABLE_CMMA:
633                 ret = -ENXIO;
634                 if (!sclp.has_cmma)
635                         break;
636
637                 ret = -EBUSY;
638                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
639                 mutex_lock(&kvm->lock);
640                 if (!kvm->created_vcpus) {
641                         kvm->arch.use_cmma = 1;
642                         ret = 0;
643                 }
644                 mutex_unlock(&kvm->lock);
645                 break;
646         case KVM_S390_VM_MEM_CLR_CMMA:
647                 ret = -ENXIO;
648                 if (!sclp.has_cmma)
649                         break;
650                 ret = -EINVAL;
651                 if (!kvm->arch.use_cmma)
652                         break;
653
654                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
655                 mutex_lock(&kvm->lock);
656                 idx = srcu_read_lock(&kvm->srcu);
657                 s390_reset_cmma(kvm->arch.gmap->mm);
658                 srcu_read_unlock(&kvm->srcu, idx);
659                 mutex_unlock(&kvm->lock);
660                 ret = 0;
661                 break;
662         case KVM_S390_VM_MEM_LIMIT_SIZE: {
663                 unsigned long new_limit;
664
665                 if (kvm_is_ucontrol(kvm))
666                         return -EINVAL;
667
668                 if (get_user(new_limit, (u64 __user *)attr->addr))
669                         return -EFAULT;
670
671                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
672                     new_limit > kvm->arch.mem_limit)
673                         return -E2BIG;
674
675                 if (!new_limit)
676                         return -EINVAL;
677
678                 /* gmap_create takes last usable address */
679                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
680                         new_limit -= 1;
681
682                 ret = -EBUSY;
683                 mutex_lock(&kvm->lock);
684                 if (!kvm->created_vcpus) {
685                         /* gmap_create will round the limit up */
686                         struct gmap *new = gmap_create(current->mm, new_limit);
687
688                         if (!new) {
689                                 ret = -ENOMEM;
690                         } else {
691                                 gmap_remove(kvm->arch.gmap);
692                                 new->private = kvm;
693                                 kvm->arch.gmap = new;
694                                 ret = 0;
695                         }
696                 }
697                 mutex_unlock(&kvm->lock);
698                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
699                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
700                          (void *) kvm->arch.gmap->asce);
701                 break;
702         }
703         default:
704                 ret = -ENXIO;
705                 break;
706         }
707         return ret;
708 }
709
710 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
711
712 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
713 {
714         struct kvm_vcpu *vcpu;
715         int i;
716
717         if (!test_kvm_facility(kvm, 76))
718                 return -EINVAL;
719
720         mutex_lock(&kvm->lock);
721         switch (attr->attr) {
722         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
723                 get_random_bytes(
724                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
725                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
726                 kvm->arch.crypto.aes_kw = 1;
727                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
728                 break;
729         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
730                 get_random_bytes(
731                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
732                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
733                 kvm->arch.crypto.dea_kw = 1;
734                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
735                 break;
736         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
737                 kvm->arch.crypto.aes_kw = 0;
738                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
739                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
740                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
741                 break;
742         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
743                 kvm->arch.crypto.dea_kw = 0;
744                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
745                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
746                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
747                 break;
748         default:
749                 mutex_unlock(&kvm->lock);
750                 return -ENXIO;
751         }
752
753         kvm_for_each_vcpu(i, vcpu, kvm) {
754                 kvm_s390_vcpu_crypto_setup(vcpu);
755                 exit_sie(vcpu);
756         }
757         mutex_unlock(&kvm->lock);
758         return 0;
759 }
760
761 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
762 {
763         int cx;
764         struct kvm_vcpu *vcpu;
765
766         kvm_for_each_vcpu(cx, vcpu, kvm)
767                 kvm_s390_sync_request(req, vcpu);
768 }
769
770 /*
771  * Must be called with kvm->srcu held to avoid races on memslots, and with
772  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
773  */
774 static int kvm_s390_vm_start_migration(struct kvm *kvm)
775 {
776         struct kvm_s390_migration_state *mgs;
777         struct kvm_memory_slot *ms;
778         /* should be the only one */
779         struct kvm_memslots *slots;
780         unsigned long ram_pages;
781         int slotnr;
782
783         /* migration mode already enabled */
784         if (kvm->arch.migration_state)
785                 return 0;
786
787         slots = kvm_memslots(kvm);
788         if (!slots || !slots->used_slots)
789                 return -EINVAL;
790
791         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
792         if (!mgs)
793                 return -ENOMEM;
794         kvm->arch.migration_state = mgs;
795
796         if (kvm->arch.use_cmma) {
797                 /*
798                  * Get the first slot. They are reverse sorted by base_gfn, so
799                  * the first slot is also the one at the end of the address
800                  * space. We have verified above that at least one slot is
801                  * present.
802                  */
803                 ms = slots->memslots;
804                 /* round up so we only use full longs */
805                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
806                 /* allocate enough bytes to store all the bits */
807                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
808                 if (!mgs->pgste_bitmap) {
809                         kfree(mgs);
810                         kvm->arch.migration_state = NULL;
811                         return -ENOMEM;
812                 }
813
814                 mgs->bitmap_size = ram_pages;
815                 atomic64_set(&mgs->dirty_pages, ram_pages);
816                 /* mark all the pages in active slots as dirty */
817                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
818                         ms = slots->memslots + slotnr;
819                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
820                 }
821
822                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
823         }
824         return 0;
825 }
826
827 /*
828  * Must be called with kvm->slots_lock to avoid races with ourselves and
829  * kvm_s390_vm_start_migration.
830  */
831 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
832 {
833         struct kvm_s390_migration_state *mgs;
834
835         /* migration mode already disabled */
836         if (!kvm->arch.migration_state)
837                 return 0;
838         mgs = kvm->arch.migration_state;
839         kvm->arch.migration_state = NULL;
840
841         if (kvm->arch.use_cmma) {
842                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
843                 /* We have to wait for the essa emulation to finish */
844                 synchronize_srcu(&kvm->srcu);
845                 vfree(mgs->pgste_bitmap);
846         }
847         kfree(mgs);
848         return 0;
849 }
850
851 static int kvm_s390_vm_set_migration(struct kvm *kvm,
852                                      struct kvm_device_attr *attr)
853 {
854         int res = -ENXIO;
855
856         mutex_lock(&kvm->slots_lock);
857         switch (attr->attr) {
858         case KVM_S390_VM_MIGRATION_START:
859                 res = kvm_s390_vm_start_migration(kvm);
860                 break;
861         case KVM_S390_VM_MIGRATION_STOP:
862                 res = kvm_s390_vm_stop_migration(kvm);
863                 break;
864         default:
865                 break;
866         }
867         mutex_unlock(&kvm->slots_lock);
868
869         return res;
870 }
871
872 static int kvm_s390_vm_get_migration(struct kvm *kvm,
873                                      struct kvm_device_attr *attr)
874 {
875         u64 mig = (kvm->arch.migration_state != NULL);
876
877         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
878                 return -ENXIO;
879
880         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
881                 return -EFAULT;
882         return 0;
883 }
884
885 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
886 {
887         struct kvm_s390_vm_tod_clock gtod;
888
889         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
890                 return -EFAULT;
891
892         if (test_kvm_facility(kvm, 139))
893                 kvm_s390_set_tod_clock_ext(kvm, &gtod);
894         else if (gtod.epoch_idx == 0)
895                 kvm_s390_set_tod_clock(kvm, gtod.tod);
896         else
897                 return -EINVAL;
898
899         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
900                 gtod.epoch_idx, gtod.tod);
901
902         return 0;
903 }
904
905 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
906 {
907         u8 gtod_high;
908
909         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
910                                            sizeof(gtod_high)))
911                 return -EFAULT;
912
913         if (gtod_high != 0)
914                 return -EINVAL;
915         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
916
917         return 0;
918 }
919
920 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
921 {
922         u64 gtod;
923
924         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
925                 return -EFAULT;
926
927         kvm_s390_set_tod_clock(kvm, gtod);
928         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
929         return 0;
930 }
931
932 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
933 {
934         int ret;
935
936         if (attr->flags)
937                 return -EINVAL;
938
939         switch (attr->attr) {
940         case KVM_S390_VM_TOD_EXT:
941                 ret = kvm_s390_set_tod_ext(kvm, attr);
942                 break;
943         case KVM_S390_VM_TOD_HIGH:
944                 ret = kvm_s390_set_tod_high(kvm, attr);
945                 break;
946         case KVM_S390_VM_TOD_LOW:
947                 ret = kvm_s390_set_tod_low(kvm, attr);
948                 break;
949         default:
950                 ret = -ENXIO;
951                 break;
952         }
953         return ret;
954 }
955
956 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
957                                         struct kvm_s390_vm_tod_clock *gtod)
958 {
959         struct kvm_s390_tod_clock_ext htod;
960
961         preempt_disable();
962
963         get_tod_clock_ext((char *)&htod);
964
965         gtod->tod = htod.tod + kvm->arch.epoch;
966         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
967
968         if (gtod->tod < htod.tod)
969                 gtod->epoch_idx += 1;
970
971         preempt_enable();
972 }
973
974 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
975 {
976         struct kvm_s390_vm_tod_clock gtod;
977
978         memset(&gtod, 0, sizeof(gtod));
979
980         if (test_kvm_facility(kvm, 139))
981                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
982         else
983                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
984
985         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
986                 return -EFAULT;
987
988         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
989                 gtod.epoch_idx, gtod.tod);
990         return 0;
991 }
992
993 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
994 {
995         u8 gtod_high = 0;
996
997         if (copy_to_user((void __user *)attr->addr, &gtod_high,
998                                          sizeof(gtod_high)))
999                 return -EFAULT;
1000         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1001
1002         return 0;
1003 }
1004
1005 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1006 {
1007         u64 gtod;
1008
1009         gtod = kvm_s390_get_tod_clock_fast(kvm);
1010         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1011                 return -EFAULT;
1012         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1013
1014         return 0;
1015 }
1016
1017 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1018 {
1019         int ret;
1020
1021         if (attr->flags)
1022                 return -EINVAL;
1023
1024         switch (attr->attr) {
1025         case KVM_S390_VM_TOD_EXT:
1026                 ret = kvm_s390_get_tod_ext(kvm, attr);
1027                 break;
1028         case KVM_S390_VM_TOD_HIGH:
1029                 ret = kvm_s390_get_tod_high(kvm, attr);
1030                 break;
1031         case KVM_S390_VM_TOD_LOW:
1032                 ret = kvm_s390_get_tod_low(kvm, attr);
1033                 break;
1034         default:
1035                 ret = -ENXIO;
1036                 break;
1037         }
1038         return ret;
1039 }
1040
1041 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1042 {
1043         struct kvm_s390_vm_cpu_processor *proc;
1044         u16 lowest_ibc, unblocked_ibc;
1045         int ret = 0;
1046
1047         mutex_lock(&kvm->lock);
1048         if (kvm->created_vcpus) {
1049                 ret = -EBUSY;
1050                 goto out;
1051         }
1052         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1053         if (!proc) {
1054                 ret = -ENOMEM;
1055                 goto out;
1056         }
1057         if (!copy_from_user(proc, (void __user *)attr->addr,
1058                             sizeof(*proc))) {
1059                 kvm->arch.model.cpuid = proc->cpuid;
1060                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1061                 unblocked_ibc = sclp.ibc & 0xfff;
1062                 if (lowest_ibc && proc->ibc) {
1063                         if (proc->ibc > unblocked_ibc)
1064                                 kvm->arch.model.ibc = unblocked_ibc;
1065                         else if (proc->ibc < lowest_ibc)
1066                                 kvm->arch.model.ibc = lowest_ibc;
1067                         else
1068                                 kvm->arch.model.ibc = proc->ibc;
1069                 }
1070                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1071                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1072                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1073                          kvm->arch.model.ibc,
1074                          kvm->arch.model.cpuid);
1075                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1076                          kvm->arch.model.fac_list[0],
1077                          kvm->arch.model.fac_list[1],
1078                          kvm->arch.model.fac_list[2]);
1079         } else
1080                 ret = -EFAULT;
1081         kfree(proc);
1082 out:
1083         mutex_unlock(&kvm->lock);
1084         return ret;
1085 }
1086
1087 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1088                                        struct kvm_device_attr *attr)
1089 {
1090         struct kvm_s390_vm_cpu_feat data;
1091         int ret = -EBUSY;
1092
1093         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1094                 return -EFAULT;
1095         if (!bitmap_subset((unsigned long *) data.feat,
1096                            kvm_s390_available_cpu_feat,
1097                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1098                 return -EINVAL;
1099
1100         mutex_lock(&kvm->lock);
1101         if (!atomic_read(&kvm->online_vcpus)) {
1102                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1103                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1104                 ret = 0;
1105         }
1106         mutex_unlock(&kvm->lock);
1107         return ret;
1108 }
1109
1110 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1111                                           struct kvm_device_attr *attr)
1112 {
1113         /*
1114          * Once supported by kernel + hw, we have to store the subfunctions
1115          * in kvm->arch and remember that user space configured them.
1116          */
1117         return -ENXIO;
1118 }
1119
1120 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1121 {
1122         int ret = -ENXIO;
1123
1124         switch (attr->attr) {
1125         case KVM_S390_VM_CPU_PROCESSOR:
1126                 ret = kvm_s390_set_processor(kvm, attr);
1127                 break;
1128         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1129                 ret = kvm_s390_set_processor_feat(kvm, attr);
1130                 break;
1131         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1132                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1133                 break;
1134         }
1135         return ret;
1136 }
1137
1138 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140         struct kvm_s390_vm_cpu_processor *proc;
1141         int ret = 0;
1142
1143         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1144         if (!proc) {
1145                 ret = -ENOMEM;
1146                 goto out;
1147         }
1148         proc->cpuid = kvm->arch.model.cpuid;
1149         proc->ibc = kvm->arch.model.ibc;
1150         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1151                S390_ARCH_FAC_LIST_SIZE_BYTE);
1152         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153                  kvm->arch.model.ibc,
1154                  kvm->arch.model.cpuid);
1155         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156                  kvm->arch.model.fac_list[0],
1157                  kvm->arch.model.fac_list[1],
1158                  kvm->arch.model.fac_list[2]);
1159         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1160                 ret = -EFAULT;
1161         kfree(proc);
1162 out:
1163         return ret;
1164 }
1165
1166 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168         struct kvm_s390_vm_cpu_machine *mach;
1169         int ret = 0;
1170
1171         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1172         if (!mach) {
1173                 ret = -ENOMEM;
1174                 goto out;
1175         }
1176         get_cpu_id((struct cpuid *) &mach->cpuid);
1177         mach->ibc = sclp.ibc;
1178         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1179                S390_ARCH_FAC_LIST_SIZE_BYTE);
1180         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1181                sizeof(S390_lowcore.stfle_fac_list));
1182         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1183                  kvm->arch.model.ibc,
1184                  kvm->arch.model.cpuid);
1185         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1186                  mach->fac_mask[0],
1187                  mach->fac_mask[1],
1188                  mach->fac_mask[2]);
1189         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1190                  mach->fac_list[0],
1191                  mach->fac_list[1],
1192                  mach->fac_list[2]);
1193         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1194                 ret = -EFAULT;
1195         kfree(mach);
1196 out:
1197         return ret;
1198 }
1199
1200 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1201                                        struct kvm_device_attr *attr)
1202 {
1203         struct kvm_s390_vm_cpu_feat data;
1204
1205         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1206                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1207         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1208                 return -EFAULT;
1209         return 0;
1210 }
1211
1212 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1213                                      struct kvm_device_attr *attr)
1214 {
1215         struct kvm_s390_vm_cpu_feat data;
1216
1217         bitmap_copy((unsigned long *) data.feat,
1218                     kvm_s390_available_cpu_feat,
1219                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1220         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1221                 return -EFAULT;
1222         return 0;
1223 }
1224
1225 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1226                                           struct kvm_device_attr *attr)
1227 {
1228         /*
1229          * Once we can actually configure subfunctions (kernel + hw support),
1230          * we have to check if they were already set by user space, if so copy
1231          * them from kvm->arch.
1232          */
1233         return -ENXIO;
1234 }
1235
1236 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1237                                         struct kvm_device_attr *attr)
1238 {
1239         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1240             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1241                 return -EFAULT;
1242         return 0;
1243 }
1244 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1245 {
1246         int ret = -ENXIO;
1247
1248         switch (attr->attr) {
1249         case KVM_S390_VM_CPU_PROCESSOR:
1250                 ret = kvm_s390_get_processor(kvm, attr);
1251                 break;
1252         case KVM_S390_VM_CPU_MACHINE:
1253                 ret = kvm_s390_get_machine(kvm, attr);
1254                 break;
1255         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1256                 ret = kvm_s390_get_processor_feat(kvm, attr);
1257                 break;
1258         case KVM_S390_VM_CPU_MACHINE_FEAT:
1259                 ret = kvm_s390_get_machine_feat(kvm, attr);
1260                 break;
1261         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1262                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1263                 break;
1264         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1265                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1266                 break;
1267         }
1268         return ret;
1269 }
1270
1271 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1272 {
1273         int ret;
1274
1275         switch (attr->group) {
1276         case KVM_S390_VM_MEM_CTRL:
1277                 ret = kvm_s390_set_mem_control(kvm, attr);
1278                 break;
1279         case KVM_S390_VM_TOD:
1280                 ret = kvm_s390_set_tod(kvm, attr);
1281                 break;
1282         case KVM_S390_VM_CPU_MODEL:
1283                 ret = kvm_s390_set_cpu_model(kvm, attr);
1284                 break;
1285         case KVM_S390_VM_CRYPTO:
1286                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1287                 break;
1288         case KVM_S390_VM_MIGRATION:
1289                 ret = kvm_s390_vm_set_migration(kvm, attr);
1290                 break;
1291         default:
1292                 ret = -ENXIO;
1293                 break;
1294         }
1295
1296         return ret;
1297 }
1298
1299 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1300 {
1301         int ret;
1302
1303         switch (attr->group) {
1304         case KVM_S390_VM_MEM_CTRL:
1305                 ret = kvm_s390_get_mem_control(kvm, attr);
1306                 break;
1307         case KVM_S390_VM_TOD:
1308                 ret = kvm_s390_get_tod(kvm, attr);
1309                 break;
1310         case KVM_S390_VM_CPU_MODEL:
1311                 ret = kvm_s390_get_cpu_model(kvm, attr);
1312                 break;
1313         case KVM_S390_VM_MIGRATION:
1314                 ret = kvm_s390_vm_get_migration(kvm, attr);
1315                 break;
1316         default:
1317                 ret = -ENXIO;
1318                 break;
1319         }
1320
1321         return ret;
1322 }
1323
1324 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1325 {
1326         int ret;
1327
1328         switch (attr->group) {
1329         case KVM_S390_VM_MEM_CTRL:
1330                 switch (attr->attr) {
1331                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1332                 case KVM_S390_VM_MEM_CLR_CMMA:
1333                         ret = sclp.has_cmma ? 0 : -ENXIO;
1334                         break;
1335                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1336                         ret = 0;
1337                         break;
1338                 default:
1339                         ret = -ENXIO;
1340                         break;
1341                 }
1342                 break;
1343         case KVM_S390_VM_TOD:
1344                 switch (attr->attr) {
1345                 case KVM_S390_VM_TOD_LOW:
1346                 case KVM_S390_VM_TOD_HIGH:
1347                         ret = 0;
1348                         break;
1349                 default:
1350                         ret = -ENXIO;
1351                         break;
1352                 }
1353                 break;
1354         case KVM_S390_VM_CPU_MODEL:
1355                 switch (attr->attr) {
1356                 case KVM_S390_VM_CPU_PROCESSOR:
1357                 case KVM_S390_VM_CPU_MACHINE:
1358                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1359                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1360                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1361                         ret = 0;
1362                         break;
1363                 /* configuring subfunctions is not supported yet */
1364                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1365                 default:
1366                         ret = -ENXIO;
1367                         break;
1368                 }
1369                 break;
1370         case KVM_S390_VM_CRYPTO:
1371                 switch (attr->attr) {
1372                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1373                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1374                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1375                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1376                         ret = 0;
1377                         break;
1378                 default:
1379                         ret = -ENXIO;
1380                         break;
1381                 }
1382                 break;
1383         case KVM_S390_VM_MIGRATION:
1384                 ret = 0;
1385                 break;
1386         default:
1387                 ret = -ENXIO;
1388                 break;
1389         }
1390
1391         return ret;
1392 }
1393
1394 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1395 {
1396         uint8_t *keys;
1397         uint64_t hva;
1398         int srcu_idx, i, r = 0;
1399
1400         if (args->flags != 0)
1401                 return -EINVAL;
1402
1403         /* Is this guest using storage keys? */
1404         if (!mm_use_skey(current->mm))
1405                 return KVM_S390_GET_SKEYS_NONE;
1406
1407         /* Enforce sane limit on memory allocation */
1408         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1409                 return -EINVAL;
1410
1411         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1412         if (!keys)
1413                 return -ENOMEM;
1414
1415         down_read(&current->mm->mmap_sem);
1416         srcu_idx = srcu_read_lock(&kvm->srcu);
1417         for (i = 0; i < args->count; i++) {
1418                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1419                 if (kvm_is_error_hva(hva)) {
1420                         r = -EFAULT;
1421                         break;
1422                 }
1423
1424                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1425                 if (r)
1426                         break;
1427         }
1428         srcu_read_unlock(&kvm->srcu, srcu_idx);
1429         up_read(&current->mm->mmap_sem);
1430
1431         if (!r) {
1432                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1433                                  sizeof(uint8_t) * args->count);
1434                 if (r)
1435                         r = -EFAULT;
1436         }
1437
1438         kvfree(keys);
1439         return r;
1440 }
1441
1442 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1443 {
1444         uint8_t *keys;
1445         uint64_t hva;
1446         int srcu_idx, i, r = 0;
1447
1448         if (args->flags != 0)
1449                 return -EINVAL;
1450
1451         /* Enforce sane limit on memory allocation */
1452         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1453                 return -EINVAL;
1454
1455         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1456         if (!keys)
1457                 return -ENOMEM;
1458
1459         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1460                            sizeof(uint8_t) * args->count);
1461         if (r) {
1462                 r = -EFAULT;
1463                 goto out;
1464         }
1465
1466         /* Enable storage key handling for the guest */
1467         r = s390_enable_skey();
1468         if (r)
1469                 goto out;
1470
1471         down_read(&current->mm->mmap_sem);
1472         srcu_idx = srcu_read_lock(&kvm->srcu);
1473         for (i = 0; i < args->count; i++) {
1474                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1475                 if (kvm_is_error_hva(hva)) {
1476                         r = -EFAULT;
1477                         break;
1478                 }
1479
1480                 /* Lowest order bit is reserved */
1481                 if (keys[i] & 0x01) {
1482                         r = -EINVAL;
1483                         break;
1484                 }
1485
1486                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1487                 if (r)
1488                         break;
1489         }
1490         srcu_read_unlock(&kvm->srcu, srcu_idx);
1491         up_read(&current->mm->mmap_sem);
1492 out:
1493         kvfree(keys);
1494         return r;
1495 }
1496
1497 /*
1498  * Base address and length must be sent at the start of each block, therefore
1499  * it's cheaper to send some clean data, as long as it's less than the size of
1500  * two longs.
1501  */
1502 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1503 /* for consistency */
1504 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1505
1506 /*
1507  * This function searches for the next page with dirty CMMA attributes, and
1508  * saves the attributes in the buffer up to either the end of the buffer or
1509  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1510  * no trailing clean bytes are saved.
1511  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1512  * output buffer will indicate 0 as length.
1513  */
1514 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1515                                   struct kvm_s390_cmma_log *args)
1516 {
1517         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1518         unsigned long bufsize, hva, pgstev, i, next, cur;
1519         int srcu_idx, peek, r = 0, rr;
1520         u8 *res;
1521
1522         cur = args->start_gfn;
1523         i = next = pgstev = 0;
1524
1525         if (unlikely(!kvm->arch.use_cmma))
1526                 return -ENXIO;
1527         /* Invalid/unsupported flags were specified */
1528         if (args->flags & ~KVM_S390_CMMA_PEEK)
1529                 return -EINVAL;
1530         /* Migration mode query, and we are not doing a migration */
1531         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1532         if (!peek && !s)
1533                 return -EINVAL;
1534         /* CMMA is disabled or was not used, or the buffer has length zero */
1535         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1536         if (!bufsize || !kvm->mm->context.use_cmma) {
1537                 memset(args, 0, sizeof(*args));
1538                 return 0;
1539         }
1540
1541         if (!peek) {
1542                 /* We are not peeking, and there are no dirty pages */
1543                 if (!atomic64_read(&s->dirty_pages)) {
1544                         memset(args, 0, sizeof(*args));
1545                         return 0;
1546                 }
1547                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1548                                     args->start_gfn);
1549                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1550                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1551                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1552                         memset(args, 0, sizeof(*args));
1553                         return 0;
1554                 }
1555                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1556         }
1557
1558         res = vmalloc(bufsize);
1559         if (!res)
1560                 return -ENOMEM;
1561
1562         args->start_gfn = cur;
1563
1564         down_read(&kvm->mm->mmap_sem);
1565         srcu_idx = srcu_read_lock(&kvm->srcu);
1566         while (i < bufsize) {
1567                 hva = gfn_to_hva(kvm, cur);
1568                 if (kvm_is_error_hva(hva)) {
1569                         r = -EFAULT;
1570                         break;
1571                 }
1572                 /* decrement only if we actually flipped the bit to 0 */
1573                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1574                         atomic64_dec(&s->dirty_pages);
1575                 r = get_pgste(kvm->mm, hva, &pgstev);
1576                 if (r < 0)
1577                         pgstev = 0;
1578                 /* save the value */
1579                 res[i++] = (pgstev >> 24) & 0x43;
1580                 /*
1581                  * if the next bit is too far away, stop.
1582                  * if we reached the previous "next", find the next one
1583                  */
1584                 if (!peek) {
1585                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1586                                 break;
1587                         if (cur == next)
1588                                 next = find_next_bit(s->pgste_bitmap,
1589                                                      s->bitmap_size, cur + 1);
1590                 /* reached the end of the bitmap or of the buffer, stop */
1591                         if ((next >= s->bitmap_size) ||
1592                             (next >= args->start_gfn + bufsize))
1593                                 break;
1594                 }
1595                 cur++;
1596         }
1597         srcu_read_unlock(&kvm->srcu, srcu_idx);
1598         up_read(&kvm->mm->mmap_sem);
1599         args->count = i;
1600         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1601
1602         rr = copy_to_user((void __user *)args->values, res, args->count);
1603         if (rr)
1604                 r = -EFAULT;
1605
1606         vfree(res);
1607         return r;
1608 }
1609
1610 /*
1611  * This function sets the CMMA attributes for the given pages. If the input
1612  * buffer has zero length, no action is taken, otherwise the attributes are
1613  * set and the mm->context.use_cmma flag is set.
1614  */
1615 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1616                                   const struct kvm_s390_cmma_log *args)
1617 {
1618         unsigned long hva, mask, pgstev, i;
1619         uint8_t *bits;
1620         int srcu_idx, r = 0;
1621
1622         mask = args->mask;
1623
1624         if (!kvm->arch.use_cmma)
1625                 return -ENXIO;
1626         /* invalid/unsupported flags */
1627         if (args->flags != 0)
1628                 return -EINVAL;
1629         /* Enforce sane limit on memory allocation */
1630         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1631                 return -EINVAL;
1632         /* Nothing to do */
1633         if (args->count == 0)
1634                 return 0;
1635
1636         bits = vmalloc(sizeof(*bits) * args->count);
1637         if (!bits)
1638                 return -ENOMEM;
1639
1640         r = copy_from_user(bits, (void __user *)args->values, args->count);
1641         if (r) {
1642                 r = -EFAULT;
1643                 goto out;
1644         }
1645
1646         down_read(&kvm->mm->mmap_sem);
1647         srcu_idx = srcu_read_lock(&kvm->srcu);
1648         for (i = 0; i < args->count; i++) {
1649                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1650                 if (kvm_is_error_hva(hva)) {
1651                         r = -EFAULT;
1652                         break;
1653                 }
1654
1655                 pgstev = bits[i];
1656                 pgstev = pgstev << 24;
1657                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1658                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1659         }
1660         srcu_read_unlock(&kvm->srcu, srcu_idx);
1661         up_read(&kvm->mm->mmap_sem);
1662
1663         if (!kvm->mm->context.use_cmma) {
1664                 down_write(&kvm->mm->mmap_sem);
1665                 kvm->mm->context.use_cmma = 1;
1666                 up_write(&kvm->mm->mmap_sem);
1667         }
1668 out:
1669         vfree(bits);
1670         return r;
1671 }
1672
1673 long kvm_arch_vm_ioctl(struct file *filp,
1674                        unsigned int ioctl, unsigned long arg)
1675 {
1676         struct kvm *kvm = filp->private_data;
1677         void __user *argp = (void __user *)arg;
1678         struct kvm_device_attr attr;
1679         int r;
1680
1681         switch (ioctl) {
1682         case KVM_S390_INTERRUPT: {
1683                 struct kvm_s390_interrupt s390int;
1684
1685                 r = -EFAULT;
1686                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1687                         break;
1688                 r = kvm_s390_inject_vm(kvm, &s390int);
1689                 break;
1690         }
1691         case KVM_ENABLE_CAP: {
1692                 struct kvm_enable_cap cap;
1693                 r = -EFAULT;
1694                 if (copy_from_user(&cap, argp, sizeof(cap)))
1695                         break;
1696                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1697                 break;
1698         }
1699         case KVM_CREATE_IRQCHIP: {
1700                 struct kvm_irq_routing_entry routing;
1701
1702                 r = -EINVAL;
1703                 if (kvm->arch.use_irqchip) {
1704                         /* Set up dummy routing. */
1705                         memset(&routing, 0, sizeof(routing));
1706                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1707                 }
1708                 break;
1709         }
1710         case KVM_SET_DEVICE_ATTR: {
1711                 r = -EFAULT;
1712                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1713                         break;
1714                 r = kvm_s390_vm_set_attr(kvm, &attr);
1715                 break;
1716         }
1717         case KVM_GET_DEVICE_ATTR: {
1718                 r = -EFAULT;
1719                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1720                         break;
1721                 r = kvm_s390_vm_get_attr(kvm, &attr);
1722                 break;
1723         }
1724         case KVM_HAS_DEVICE_ATTR: {
1725                 r = -EFAULT;
1726                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1727                         break;
1728                 r = kvm_s390_vm_has_attr(kvm, &attr);
1729                 break;
1730         }
1731         case KVM_S390_GET_SKEYS: {
1732                 struct kvm_s390_skeys args;
1733
1734                 r = -EFAULT;
1735                 if (copy_from_user(&args, argp,
1736                                    sizeof(struct kvm_s390_skeys)))
1737                         break;
1738                 r = kvm_s390_get_skeys(kvm, &args);
1739                 break;
1740         }
1741         case KVM_S390_SET_SKEYS: {
1742                 struct kvm_s390_skeys args;
1743
1744                 r = -EFAULT;
1745                 if (copy_from_user(&args, argp,
1746                                    sizeof(struct kvm_s390_skeys)))
1747                         break;
1748                 r = kvm_s390_set_skeys(kvm, &args);
1749                 break;
1750         }
1751         case KVM_S390_GET_CMMA_BITS: {
1752                 struct kvm_s390_cmma_log args;
1753
1754                 r = -EFAULT;
1755                 if (copy_from_user(&args, argp, sizeof(args)))
1756                         break;
1757                 mutex_lock(&kvm->slots_lock);
1758                 r = kvm_s390_get_cmma_bits(kvm, &args);
1759                 mutex_unlock(&kvm->slots_lock);
1760                 if (!r) {
1761                         r = copy_to_user(argp, &args, sizeof(args));
1762                         if (r)
1763                                 r = -EFAULT;
1764                 }
1765                 break;
1766         }
1767         case KVM_S390_SET_CMMA_BITS: {
1768                 struct kvm_s390_cmma_log args;
1769
1770                 r = -EFAULT;
1771                 if (copy_from_user(&args, argp, sizeof(args)))
1772                         break;
1773                 mutex_lock(&kvm->slots_lock);
1774                 r = kvm_s390_set_cmma_bits(kvm, &args);
1775                 mutex_unlock(&kvm->slots_lock);
1776                 break;
1777         }
1778         default:
1779                 r = -ENOTTY;
1780         }
1781
1782         return r;
1783 }
1784
1785 static int kvm_s390_query_ap_config(u8 *config)
1786 {
1787         u32 fcn_code = 0x04000000UL;
1788         u32 cc = 0;
1789
1790         memset(config, 0, 128);
1791         asm volatile(
1792                 "lgr 0,%1\n"
1793                 "lgr 2,%2\n"
1794                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1795                 "0: ipm %0\n"
1796                 "srl %0,28\n"
1797                 "1:\n"
1798                 EX_TABLE(0b, 1b)
1799                 : "+r" (cc)
1800                 : "r" (fcn_code), "r" (config)
1801                 : "cc", "0", "2", "memory"
1802         );
1803
1804         return cc;
1805 }
1806
1807 static int kvm_s390_apxa_installed(void)
1808 {
1809         u8 config[128];
1810         int cc;
1811
1812         if (test_facility(12)) {
1813                 cc = kvm_s390_query_ap_config(config);
1814
1815                 if (cc)
1816                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1817                 else
1818                         return config[0] & 0x40;
1819         }
1820
1821         return 0;
1822 }
1823
1824 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1825 {
1826         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1827
1828         if (kvm_s390_apxa_installed())
1829                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1830         else
1831                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1832 }
1833
1834 static u64 kvm_s390_get_initial_cpuid(void)
1835 {
1836         struct cpuid cpuid;
1837
1838         get_cpu_id(&cpuid);
1839         cpuid.version = 0xff;
1840         return *((u64 *) &cpuid);
1841 }
1842
1843 static void kvm_s390_crypto_init(struct kvm *kvm)
1844 {
1845         if (!test_kvm_facility(kvm, 76))
1846                 return;
1847
1848         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1849         kvm_s390_set_crycb_format(kvm);
1850
1851         /* Enable AES/DEA protected key functions by default */
1852         kvm->arch.crypto.aes_kw = 1;
1853         kvm->arch.crypto.dea_kw = 1;
1854         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1855                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1856         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1857                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1858 }
1859
1860 static void sca_dispose(struct kvm *kvm)
1861 {
1862         if (kvm->arch.use_esca)
1863                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1864         else
1865                 free_page((unsigned long)(kvm->arch.sca));
1866         kvm->arch.sca = NULL;
1867 }
1868
1869 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1870 {
1871         gfp_t alloc_flags = GFP_KERNEL;
1872         int i, rc;
1873         char debug_name[16];
1874         static unsigned long sca_offset;
1875
1876         rc = -EINVAL;
1877 #ifdef CONFIG_KVM_S390_UCONTROL
1878         if (type & ~KVM_VM_S390_UCONTROL)
1879                 goto out_err;
1880         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1881                 goto out_err;
1882 #else
1883         if (type)
1884                 goto out_err;
1885 #endif
1886
1887         rc = s390_enable_sie();
1888         if (rc)
1889                 goto out_err;
1890
1891         rc = -ENOMEM;
1892
1893         kvm->arch.use_esca = 0; /* start with basic SCA */
1894         if (!sclp.has_64bscao)
1895                 alloc_flags |= GFP_DMA;
1896         rwlock_init(&kvm->arch.sca_lock);
1897         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1898         if (!kvm->arch.sca)
1899                 goto out_err;
1900         spin_lock(&kvm_lock);
1901         sca_offset += 16;
1902         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1903                 sca_offset = 0;
1904         kvm->arch.sca = (struct bsca_block *)
1905                         ((char *) kvm->arch.sca + sca_offset);
1906         spin_unlock(&kvm_lock);
1907
1908         sprintf(debug_name, "kvm-%u", current->pid);
1909
1910         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1911         if (!kvm->arch.dbf)
1912                 goto out_err;
1913
1914         kvm->arch.sie_page2 =
1915              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1916         if (!kvm->arch.sie_page2)
1917                 goto out_err;
1918
1919         /* Populate the facility mask initially. */
1920         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1921                sizeof(S390_lowcore.stfle_fac_list));
1922         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1923                 if (i < kvm_s390_fac_list_mask_size())
1924                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1925                 else
1926                         kvm->arch.model.fac_mask[i] = 0UL;
1927         }
1928
1929         /* Populate the facility list initially. */
1930         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1931         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1932                S390_ARCH_FAC_LIST_SIZE_BYTE);
1933
1934         /* we are always in czam mode - even on pre z14 machines */
1935         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1936         set_kvm_facility(kvm->arch.model.fac_list, 138);
1937         /* we emulate STHYI in kvm */
1938         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1939         set_kvm_facility(kvm->arch.model.fac_list, 74);
1940         if (MACHINE_HAS_TLB_GUEST) {
1941                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1942                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1943         }
1944
1945         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1946         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1947
1948         kvm_s390_crypto_init(kvm);
1949
1950         mutex_init(&kvm->arch.float_int.ais_lock);
1951         kvm->arch.float_int.simm = 0;
1952         kvm->arch.float_int.nimm = 0;
1953         spin_lock_init(&kvm->arch.float_int.lock);
1954         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1955                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1956         init_waitqueue_head(&kvm->arch.ipte_wq);
1957         mutex_init(&kvm->arch.ipte_mutex);
1958
1959         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1960         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1961
1962         if (type & KVM_VM_S390_UCONTROL) {
1963                 kvm->arch.gmap = NULL;
1964                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1965         } else {
1966                 if (sclp.hamax == U64_MAX)
1967                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1968                 else
1969                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1970                                                     sclp.hamax + 1);
1971                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1972                 if (!kvm->arch.gmap)
1973                         goto out_err;
1974                 kvm->arch.gmap->private = kvm;
1975                 kvm->arch.gmap->pfault_enabled = 0;
1976         }
1977
1978         kvm->arch.css_support = 0;
1979         kvm->arch.use_irqchip = 0;
1980         kvm->arch.epoch = 0;
1981
1982         spin_lock_init(&kvm->arch.start_stop_lock);
1983         kvm_s390_vsie_init(kvm);
1984         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1985
1986         return 0;
1987 out_err:
1988         free_page((unsigned long)kvm->arch.sie_page2);
1989         debug_unregister(kvm->arch.dbf);
1990         sca_dispose(kvm);
1991         KVM_EVENT(3, "creation of vm failed: %d", rc);
1992         return rc;
1993 }
1994
1995 bool kvm_arch_has_vcpu_debugfs(void)
1996 {
1997         return false;
1998 }
1999
2000 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2001 {
2002         return 0;
2003 }
2004
2005 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2006 {
2007         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2008         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2009         kvm_s390_clear_local_irqs(vcpu);
2010         kvm_clear_async_pf_completion_queue(vcpu);
2011         if (!kvm_is_ucontrol(vcpu->kvm))
2012                 sca_del_vcpu(vcpu);
2013
2014         if (kvm_is_ucontrol(vcpu->kvm))
2015                 gmap_remove(vcpu->arch.gmap);
2016
2017         if (vcpu->kvm->arch.use_cmma)
2018                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2019         free_page((unsigned long)(vcpu->arch.sie_block));
2020
2021         kvm_vcpu_uninit(vcpu);
2022         kmem_cache_free(kvm_vcpu_cache, vcpu);
2023 }
2024
2025 static void kvm_free_vcpus(struct kvm *kvm)
2026 {
2027         unsigned int i;
2028         struct kvm_vcpu *vcpu;
2029
2030         kvm_for_each_vcpu(i, vcpu, kvm)
2031                 kvm_arch_vcpu_destroy(vcpu);
2032
2033         mutex_lock(&kvm->lock);
2034         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2035                 kvm->vcpus[i] = NULL;
2036
2037         atomic_set(&kvm->online_vcpus, 0);
2038         mutex_unlock(&kvm->lock);
2039 }
2040
2041 void kvm_arch_destroy_vm(struct kvm *kvm)
2042 {
2043         kvm_free_vcpus(kvm);
2044         sca_dispose(kvm);
2045         debug_unregister(kvm->arch.dbf);
2046         free_page((unsigned long)kvm->arch.sie_page2);
2047         if (!kvm_is_ucontrol(kvm))
2048                 gmap_remove(kvm->arch.gmap);
2049         kvm_s390_destroy_adapters(kvm);
2050         kvm_s390_clear_float_irqs(kvm);
2051         kvm_s390_vsie_destroy(kvm);
2052         if (kvm->arch.migration_state) {
2053                 vfree(kvm->arch.migration_state->pgste_bitmap);
2054                 kfree(kvm->arch.migration_state);
2055         }
2056         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2057 }
2058
2059 /* Section: vcpu related */
2060 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2061 {
2062         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2063         if (!vcpu->arch.gmap)
2064                 return -ENOMEM;
2065         vcpu->arch.gmap->private = vcpu->kvm;
2066
2067         return 0;
2068 }
2069
2070 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2071 {
2072         if (!kvm_s390_use_sca_entries())
2073                 return;
2074         read_lock(&vcpu->kvm->arch.sca_lock);
2075         if (vcpu->kvm->arch.use_esca) {
2076                 struct esca_block *sca = vcpu->kvm->arch.sca;
2077
2078                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2079                 sca->cpu[vcpu->vcpu_id].sda = 0;
2080         } else {
2081                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2082
2083                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2084                 sca->cpu[vcpu->vcpu_id].sda = 0;
2085         }
2086         read_unlock(&vcpu->kvm->arch.sca_lock);
2087 }
2088
2089 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2090 {
2091         if (!kvm_s390_use_sca_entries()) {
2092                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2093
2094                 /* we still need the basic sca for the ipte control */
2095                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2096                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2097         }
2098         read_lock(&vcpu->kvm->arch.sca_lock);
2099         if (vcpu->kvm->arch.use_esca) {
2100                 struct esca_block *sca = vcpu->kvm->arch.sca;
2101
2102                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2103                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2104                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2105                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2106                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2107         } else {
2108                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2109
2110                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2111                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2112                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2113                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2114         }
2115         read_unlock(&vcpu->kvm->arch.sca_lock);
2116 }
2117
2118 /* Basic SCA to Extended SCA data copy routines */
2119 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2120 {
2121         d->sda = s->sda;
2122         d->sigp_ctrl.c = s->sigp_ctrl.c;
2123         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2124 }
2125
2126 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2127 {
2128         int i;
2129
2130         d->ipte_control = s->ipte_control;
2131         d->mcn[0] = s->mcn;
2132         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2133                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2134 }
2135
2136 static int sca_switch_to_extended(struct kvm *kvm)
2137 {
2138         struct bsca_block *old_sca = kvm->arch.sca;
2139         struct esca_block *new_sca;
2140         struct kvm_vcpu *vcpu;
2141         unsigned int vcpu_idx;
2142         u32 scaol, scaoh;
2143
2144         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2145         if (!new_sca)
2146                 return -ENOMEM;
2147
2148         scaoh = (u32)((u64)(new_sca) >> 32);
2149         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2150
2151         kvm_s390_vcpu_block_all(kvm);
2152         write_lock(&kvm->arch.sca_lock);
2153
2154         sca_copy_b_to_e(new_sca, old_sca);
2155
2156         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2157                 vcpu->arch.sie_block->scaoh = scaoh;
2158                 vcpu->arch.sie_block->scaol = scaol;
2159                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2160         }
2161         kvm->arch.sca = new_sca;
2162         kvm->arch.use_esca = 1;
2163
2164         write_unlock(&kvm->arch.sca_lock);
2165         kvm_s390_vcpu_unblock_all(kvm);
2166
2167         free_page((unsigned long)old_sca);
2168
2169         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2170                  old_sca, kvm->arch.sca);
2171         return 0;
2172 }
2173
2174 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2175 {
2176         int rc;
2177
2178         if (!kvm_s390_use_sca_entries()) {
2179                 if (id < KVM_MAX_VCPUS)
2180                         return true;
2181                 return false;
2182         }
2183         if (id < KVM_S390_BSCA_CPU_SLOTS)
2184                 return true;
2185         if (!sclp.has_esca || !sclp.has_64bscao)
2186                 return false;
2187
2188         mutex_lock(&kvm->lock);
2189         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2190         mutex_unlock(&kvm->lock);
2191
2192         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2193 }
2194
2195 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2196 {
2197         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2198         kvm_clear_async_pf_completion_queue(vcpu);
2199         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2200                                     KVM_SYNC_GPRS |
2201                                     KVM_SYNC_ACRS |
2202                                     KVM_SYNC_CRS |
2203                                     KVM_SYNC_ARCH0 |
2204                                     KVM_SYNC_PFAULT;
2205         kvm_s390_set_prefix(vcpu, 0);
2206         if (test_kvm_facility(vcpu->kvm, 64))
2207                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2208         if (test_kvm_facility(vcpu->kvm, 82))
2209                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2210         if (test_kvm_facility(vcpu->kvm, 133))
2211                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2212         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2213          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2214          */
2215         if (MACHINE_HAS_VX)
2216                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2217         else
2218                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2219
2220         if (kvm_is_ucontrol(vcpu->kvm))
2221                 return __kvm_ucontrol_vcpu_init(vcpu);
2222
2223         return 0;
2224 }
2225
2226 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2227 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2228 {
2229         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2230         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2231         vcpu->arch.cputm_start = get_tod_clock_fast();
2232         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2233 }
2234
2235 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2236 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2237 {
2238         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2239         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2240         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2241         vcpu->arch.cputm_start = 0;
2242         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2243 }
2244
2245 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2246 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2247 {
2248         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2249         vcpu->arch.cputm_enabled = true;
2250         __start_cpu_timer_accounting(vcpu);
2251 }
2252
2253 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2254 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2255 {
2256         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2257         __stop_cpu_timer_accounting(vcpu);
2258         vcpu->arch.cputm_enabled = false;
2259 }
2260
2261 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2262 {
2263         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2264         __enable_cpu_timer_accounting(vcpu);
2265         preempt_enable();
2266 }
2267
2268 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2269 {
2270         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2271         __disable_cpu_timer_accounting(vcpu);
2272         preempt_enable();
2273 }
2274
2275 /* set the cpu timer - may only be called from the VCPU thread itself */
2276 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2277 {
2278         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2279         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2280         if (vcpu->arch.cputm_enabled)
2281                 vcpu->arch.cputm_start = get_tod_clock_fast();
2282         vcpu->arch.sie_block->cputm = cputm;
2283         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2284         preempt_enable();
2285 }
2286
2287 /* update and get the cpu timer - can also be called from other VCPU threads */
2288 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2289 {
2290         unsigned int seq;
2291         __u64 value;
2292
2293         if (unlikely(!vcpu->arch.cputm_enabled))
2294                 return vcpu->arch.sie_block->cputm;
2295
2296         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2297         do {
2298                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2299                 /*
2300                  * If the writer would ever execute a read in the critical
2301                  * section, e.g. in irq context, we have a deadlock.
2302                  */
2303                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2304                 value = vcpu->arch.sie_block->cputm;
2305                 /* if cputm_start is 0, accounting is being started/stopped */
2306                 if (likely(vcpu->arch.cputm_start))
2307                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2308         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2309         preempt_enable();
2310         return value;
2311 }
2312
2313 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2314 {
2315
2316         gmap_enable(vcpu->arch.enabled_gmap);
2317         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2318         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2319                 __start_cpu_timer_accounting(vcpu);
2320         vcpu->cpu = cpu;
2321 }
2322
2323 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2324 {
2325         vcpu->cpu = -1;
2326         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2327                 __stop_cpu_timer_accounting(vcpu);
2328         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2329         vcpu->arch.enabled_gmap = gmap_get_enabled();
2330         gmap_disable(vcpu->arch.enabled_gmap);
2331
2332 }
2333
2334 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2335 {
2336         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2337         vcpu->arch.sie_block->gpsw.mask = 0UL;
2338         vcpu->arch.sie_block->gpsw.addr = 0UL;
2339         kvm_s390_set_prefix(vcpu, 0);
2340         kvm_s390_set_cpu_timer(vcpu, 0);
2341         vcpu->arch.sie_block->ckc       = 0UL;
2342         vcpu->arch.sie_block->todpr     = 0;
2343         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2344         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2345         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2346         /* make sure the new fpc will be lazily loaded */
2347         save_fpu_regs();
2348         current->thread.fpu.fpc = 0;
2349         vcpu->arch.sie_block->gbea = 1;
2350         vcpu->arch.sie_block->pp = 0;
2351         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2352         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2353         kvm_clear_async_pf_completion_queue(vcpu);
2354         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2355                 kvm_s390_vcpu_stop(vcpu);
2356         kvm_s390_clear_local_irqs(vcpu);
2357 }
2358
2359 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2360 {
2361         mutex_lock(&vcpu->kvm->lock);
2362         preempt_disable();
2363         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2364         preempt_enable();
2365         mutex_unlock(&vcpu->kvm->lock);
2366         if (!kvm_is_ucontrol(vcpu->kvm)) {
2367                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2368                 sca_add_vcpu(vcpu);
2369         }
2370         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2371                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2372         /* make vcpu_load load the right gmap on the first trigger */
2373         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2374 }
2375
2376 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2377 {
2378         if (!test_kvm_facility(vcpu->kvm, 76))
2379                 return;
2380
2381         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2382
2383         if (vcpu->kvm->arch.crypto.aes_kw)
2384                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2385         if (vcpu->kvm->arch.crypto.dea_kw)
2386                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2387
2388         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2389 }
2390
2391 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2392 {
2393         free_page(vcpu->arch.sie_block->cbrlo);
2394         vcpu->arch.sie_block->cbrlo = 0;
2395 }
2396
2397 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2398 {
2399         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2400         if (!vcpu->arch.sie_block->cbrlo)
2401                 return -ENOMEM;
2402
2403         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2404         return 0;
2405 }
2406
2407 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2408 {
2409         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2410
2411         vcpu->arch.sie_block->ibc = model->ibc;
2412         if (test_kvm_facility(vcpu->kvm, 7))
2413                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2414 }
2415
2416 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2417 {
2418         int rc = 0;
2419
2420         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2421                                                     CPUSTAT_SM |
2422                                                     CPUSTAT_STOPPED);
2423
2424         if (test_kvm_facility(vcpu->kvm, 78))
2425                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2426         else if (test_kvm_facility(vcpu->kvm, 8))
2427                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2428
2429         kvm_s390_vcpu_setup_model(vcpu);
2430
2431         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2432         if (MACHINE_HAS_ESOP)
2433                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2434         if (test_kvm_facility(vcpu->kvm, 9))
2435                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2436         if (test_kvm_facility(vcpu->kvm, 73))
2437                 vcpu->arch.sie_block->ecb |= ECB_TE;
2438
2439         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2440                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2441         if (test_kvm_facility(vcpu->kvm, 130))
2442                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2443         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2444         if (sclp.has_cei)
2445                 vcpu->arch.sie_block->eca |= ECA_CEI;
2446         if (sclp.has_ib)
2447                 vcpu->arch.sie_block->eca |= ECA_IB;
2448         if (sclp.has_siif)
2449                 vcpu->arch.sie_block->eca |= ECA_SII;
2450         if (sclp.has_sigpif)
2451                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2452         if (test_kvm_facility(vcpu->kvm, 129)) {
2453                 vcpu->arch.sie_block->eca |= ECA_VX;
2454                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2455         }
2456         if (test_kvm_facility(vcpu->kvm, 139))
2457                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2458
2459         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2460                                         | SDNXC;
2461         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2462
2463         if (sclp.has_kss)
2464                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2465         else
2466                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2467
2468         if (vcpu->kvm->arch.use_cmma) {
2469                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2470                 if (rc)
2471                         return rc;
2472         }
2473         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2474         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2475
2476         kvm_s390_vcpu_crypto_setup(vcpu);
2477
2478         return rc;
2479 }
2480
2481 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2482                                       unsigned int id)
2483 {
2484         struct kvm_vcpu *vcpu;
2485         struct sie_page *sie_page;
2486         int rc = -EINVAL;
2487
2488         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2489                 goto out;
2490
2491         rc = -ENOMEM;
2492
2493         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2494         if (!vcpu)
2495                 goto out;
2496
2497         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2498         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2499         if (!sie_page)
2500                 goto out_free_cpu;
2501
2502         vcpu->arch.sie_block = &sie_page->sie_block;
2503         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2504
2505         /* the real guest size will always be smaller than msl */
2506         vcpu->arch.sie_block->mso = 0;
2507         vcpu->arch.sie_block->msl = sclp.hamax;
2508
2509         vcpu->arch.sie_block->icpua = id;
2510         spin_lock_init(&vcpu->arch.local_int.lock);
2511         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2512         vcpu->arch.local_int.wq = &vcpu->wq;
2513         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2514         seqcount_init(&vcpu->arch.cputm_seqcount);
2515
2516         rc = kvm_vcpu_init(vcpu, kvm, id);
2517         if (rc)
2518                 goto out_free_sie_block;
2519         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2520                  vcpu->arch.sie_block);
2521         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2522
2523         return vcpu;
2524 out_free_sie_block:
2525         free_page((unsigned long)(vcpu->arch.sie_block));
2526 out_free_cpu:
2527         kmem_cache_free(kvm_vcpu_cache, vcpu);
2528 out:
2529         return ERR_PTR(rc);
2530 }
2531
2532 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2533 {
2534         return kvm_s390_vcpu_has_irq(vcpu, 0);
2535 }
2536
2537 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2538 {
2539         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2540 }
2541
2542 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2543 {
2544         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2545         exit_sie(vcpu);
2546 }
2547
2548 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2549 {
2550         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2551 }
2552
2553 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2554 {
2555         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2556         exit_sie(vcpu);
2557 }
2558
2559 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2560 {
2561         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2562 }
2563
2564 /*
2565  * Kick a guest cpu out of SIE and wait until SIE is not running.
2566  * If the CPU is not running (e.g. waiting as idle) the function will
2567  * return immediately. */
2568 void exit_sie(struct kvm_vcpu *vcpu)
2569 {
2570         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2571         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2572                 cpu_relax();
2573 }
2574
2575 /* Kick a guest cpu out of SIE to process a request synchronously */
2576 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2577 {
2578         kvm_make_request(req, vcpu);
2579         kvm_s390_vcpu_request(vcpu);
2580 }
2581
2582 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2583                               unsigned long end)
2584 {
2585         struct kvm *kvm = gmap->private;
2586         struct kvm_vcpu *vcpu;
2587         unsigned long prefix;
2588         int i;
2589
2590         if (gmap_is_shadow(gmap))
2591                 return;
2592         if (start >= 1UL << 31)
2593                 /* We are only interested in prefix pages */
2594                 return;
2595         kvm_for_each_vcpu(i, vcpu, kvm) {
2596                 /* match against both prefix pages */
2597                 prefix = kvm_s390_get_prefix(vcpu);
2598                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2599                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2600                                    start, end);
2601                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2602                 }
2603         }
2604 }
2605
2606 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2607 {
2608         /* kvm common code refers to this, but never calls it */
2609         BUG();
2610         return 0;
2611 }
2612
2613 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2614                                            struct kvm_one_reg *reg)
2615 {
2616         int r = -EINVAL;
2617
2618         switch (reg->id) {
2619         case KVM_REG_S390_TODPR:
2620                 r = put_user(vcpu->arch.sie_block->todpr,
2621                              (u32 __user *)reg->addr);
2622                 break;
2623         case KVM_REG_S390_EPOCHDIFF:
2624                 r = put_user(vcpu->arch.sie_block->epoch,
2625                              (u64 __user *)reg->addr);
2626                 break;
2627         case KVM_REG_S390_CPU_TIMER:
2628                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2629                              (u64 __user *)reg->addr);
2630                 break;
2631         case KVM_REG_S390_CLOCK_COMP:
2632                 r = put_user(vcpu->arch.sie_block->ckc,
2633                              (u64 __user *)reg->addr);
2634                 break;
2635         case KVM_REG_S390_PFTOKEN:
2636                 r = put_user(vcpu->arch.pfault_token,
2637                              (u64 __user *)reg->addr);
2638                 break;
2639         case KVM_REG_S390_PFCOMPARE:
2640                 r = put_user(vcpu->arch.pfault_compare,
2641                              (u64 __user *)reg->addr);
2642                 break;
2643         case KVM_REG_S390_PFSELECT:
2644                 r = put_user(vcpu->arch.pfault_select,
2645                              (u64 __user *)reg->addr);
2646                 break;
2647         case KVM_REG_S390_PP:
2648                 r = put_user(vcpu->arch.sie_block->pp,
2649                              (u64 __user *)reg->addr);
2650                 break;
2651         case KVM_REG_S390_GBEA:
2652                 r = put_user(vcpu->arch.sie_block->gbea,
2653                              (u64 __user *)reg->addr);
2654                 break;
2655         default:
2656                 break;
2657         }
2658
2659         return r;
2660 }
2661
2662 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2663                                            struct kvm_one_reg *reg)
2664 {
2665         int r = -EINVAL;
2666         __u64 val;
2667
2668         switch (reg->id) {
2669         case KVM_REG_S390_TODPR:
2670                 r = get_user(vcpu->arch.sie_block->todpr,
2671                              (u32 __user *)reg->addr);
2672                 break;
2673         case KVM_REG_S390_EPOCHDIFF:
2674                 r = get_user(vcpu->arch.sie_block->epoch,
2675                              (u64 __user *)reg->addr);
2676                 break;
2677         case KVM_REG_S390_CPU_TIMER:
2678                 r = get_user(val, (u64 __user *)reg->addr);
2679                 if (!r)
2680                         kvm_s390_set_cpu_timer(vcpu, val);
2681                 break;
2682         case KVM_REG_S390_CLOCK_COMP:
2683                 r = get_user(vcpu->arch.sie_block->ckc,
2684                              (u64 __user *)reg->addr);
2685                 break;
2686         case KVM_REG_S390_PFTOKEN:
2687                 r = get_user(vcpu->arch.pfault_token,
2688                              (u64 __user *)reg->addr);
2689                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2690                         kvm_clear_async_pf_completion_queue(vcpu);
2691                 break;
2692         case KVM_REG_S390_PFCOMPARE:
2693                 r = get_user(vcpu->arch.pfault_compare,
2694                              (u64 __user *)reg->addr);
2695                 break;
2696         case KVM_REG_S390_PFSELECT:
2697                 r = get_user(vcpu->arch.pfault_select,
2698                              (u64 __user *)reg->addr);
2699                 break;
2700         case KVM_REG_S390_PP:
2701                 r = get_user(vcpu->arch.sie_block->pp,
2702                              (u64 __user *)reg->addr);
2703                 break;
2704         case KVM_REG_S390_GBEA:
2705                 r = get_user(vcpu->arch.sie_block->gbea,
2706                              (u64 __user *)reg->addr);
2707                 break;
2708         default:
2709                 break;
2710         }
2711
2712         return r;
2713 }
2714
2715 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2716 {
2717         kvm_s390_vcpu_initial_reset(vcpu);
2718         return 0;
2719 }
2720
2721 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2722 {
2723         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2724         return 0;
2725 }
2726
2727 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2728 {
2729         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2730         return 0;
2731 }
2732
2733 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2734                                   struct kvm_sregs *sregs)
2735 {
2736         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2737         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2738         return 0;
2739 }
2740
2741 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2742                                   struct kvm_sregs *sregs)
2743 {
2744         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2745         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2746         return 0;
2747 }
2748
2749 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2750 {
2751         if (test_fp_ctl(fpu->fpc))
2752                 return -EINVAL;
2753         vcpu->run->s.regs.fpc = fpu->fpc;
2754         if (MACHINE_HAS_VX)
2755                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2756                                  (freg_t *) fpu->fprs);
2757         else
2758                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2759         return 0;
2760 }
2761
2762 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2763 {
2764         /* make sure we have the latest values */
2765         save_fpu_regs();
2766         if (MACHINE_HAS_VX)
2767                 convert_vx_to_fp((freg_t *) fpu->fprs,
2768                                  (__vector128 *) vcpu->run->s.regs.vrs);
2769         else
2770                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2771         fpu->fpc = vcpu->run->s.regs.fpc;
2772         return 0;
2773 }
2774
2775 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2776 {
2777         int rc = 0;
2778
2779         if (!is_vcpu_stopped(vcpu))
2780                 rc = -EBUSY;
2781         else {
2782                 vcpu->run->psw_mask = psw.mask;
2783                 vcpu->run->psw_addr = psw.addr;
2784         }
2785         return rc;
2786 }
2787
2788 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2789                                   struct kvm_translation *tr)
2790 {
2791         return -EINVAL; /* not implemented yet */
2792 }
2793
2794 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2795                               KVM_GUESTDBG_USE_HW_BP | \
2796                               KVM_GUESTDBG_ENABLE)
2797
2798 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2799                                         struct kvm_guest_debug *dbg)
2800 {
2801         int rc = 0;
2802
2803         vcpu->guest_debug = 0;
2804         kvm_s390_clear_bp_data(vcpu);
2805
2806         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2807                 return -EINVAL;
2808         if (!sclp.has_gpere)
2809                 return -EINVAL;
2810
2811         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2812                 vcpu->guest_debug = dbg->control;
2813                 /* enforce guest PER */
2814                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2815
2816                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2817                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2818         } else {
2819                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2820                 vcpu->arch.guestdbg.last_bp = 0;
2821         }
2822
2823         if (rc) {
2824                 vcpu->guest_debug = 0;
2825                 kvm_s390_clear_bp_data(vcpu);
2826                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2827         }
2828
2829         return rc;
2830 }
2831
2832 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2833                                     struct kvm_mp_state *mp_state)
2834 {
2835         /* CHECK_STOP and LOAD are not supported yet */
2836         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2837                                        KVM_MP_STATE_OPERATING;
2838 }
2839
2840 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2841                                     struct kvm_mp_state *mp_state)
2842 {
2843         int rc = 0;
2844
2845         /* user space knows about this interface - let it control the state */
2846         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2847
2848         switch (mp_state->mp_state) {
2849         case KVM_MP_STATE_STOPPED:
2850                 kvm_s390_vcpu_stop(vcpu);
2851                 break;
2852         case KVM_MP_STATE_OPERATING:
2853                 kvm_s390_vcpu_start(vcpu);
2854                 break;
2855         case KVM_MP_STATE_LOAD:
2856         case KVM_MP_STATE_CHECK_STOP:
2857                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2858         default:
2859                 rc = -ENXIO;
2860         }
2861
2862         return rc;
2863 }
2864
2865 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2866 {
2867         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2868 }
2869
2870 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2871 {
2872 retry:
2873         kvm_s390_vcpu_request_handled(vcpu);
2874         if (!kvm_request_pending(vcpu))
2875                 return 0;
2876         /*
2877          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2878          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2879          * This ensures that the ipte instruction for this request has
2880          * already finished. We might race against a second unmapper that
2881          * wants to set the blocking bit. Lets just retry the request loop.
2882          */
2883         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2884                 int rc;
2885                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2886                                           kvm_s390_get_prefix(vcpu),
2887                                           PAGE_SIZE * 2, PROT_WRITE);
2888                 if (rc) {
2889                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2890                         return rc;
2891                 }
2892                 goto retry;
2893         }
2894
2895         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2896                 vcpu->arch.sie_block->ihcpu = 0xffff;
2897                 goto retry;
2898         }
2899
2900         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2901                 if (!ibs_enabled(vcpu)) {
2902                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2903                         atomic_or(CPUSTAT_IBS,
2904                                         &vcpu->arch.sie_block->cpuflags);
2905                 }
2906                 goto retry;
2907         }
2908
2909         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2910                 if (ibs_enabled(vcpu)) {
2911                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2912                         atomic_andnot(CPUSTAT_IBS,
2913                                           &vcpu->arch.sie_block->cpuflags);
2914                 }
2915                 goto retry;
2916         }
2917
2918         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2919                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2920                 goto retry;
2921         }
2922
2923         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2924                 /*
2925                  * Disable CMMA virtualization; we will emulate the ESSA
2926                  * instruction manually, in order to provide additional
2927                  * functionalities needed for live migration.
2928                  */
2929                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2930                 goto retry;
2931         }
2932
2933         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2934                 /*
2935                  * Re-enable CMMA virtualization if CMMA is available and
2936                  * was used.
2937                  */
2938                 if ((vcpu->kvm->arch.use_cmma) &&
2939                     (vcpu->kvm->mm->context.use_cmma))
2940                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2941                 goto retry;
2942         }
2943
2944         /* nothing to do, just clear the request */
2945         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2946
2947         return 0;
2948 }
2949
2950 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2951                                  const struct kvm_s390_vm_tod_clock *gtod)
2952 {
2953         struct kvm_vcpu *vcpu;
2954         struct kvm_s390_tod_clock_ext htod;
2955         int i;
2956
2957         mutex_lock(&kvm->lock);
2958         preempt_disable();
2959
2960         get_tod_clock_ext((char *)&htod);
2961
2962         kvm->arch.epoch = gtod->tod - htod.tod;
2963         kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2964
2965         if (kvm->arch.epoch > gtod->tod)
2966                 kvm->arch.epdx -= 1;
2967
2968         kvm_s390_vcpu_block_all(kvm);
2969         kvm_for_each_vcpu(i, vcpu, kvm) {
2970                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2971                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2972         }
2973
2974         kvm_s390_vcpu_unblock_all(kvm);
2975         preempt_enable();
2976         mutex_unlock(&kvm->lock);
2977 }
2978
2979 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2980 {
2981         struct kvm_vcpu *vcpu;
2982         int i;
2983
2984         mutex_lock(&kvm->lock);
2985         preempt_disable();
2986         kvm->arch.epoch = tod - get_tod_clock();
2987         kvm_s390_vcpu_block_all(kvm);
2988         kvm_for_each_vcpu(i, vcpu, kvm)
2989                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2990         kvm_s390_vcpu_unblock_all(kvm);
2991         preempt_enable();
2992         mutex_unlock(&kvm->lock);
2993 }
2994
2995 /**
2996  * kvm_arch_fault_in_page - fault-in guest page if necessary
2997  * @vcpu: The corresponding virtual cpu
2998  * @gpa: Guest physical address
2999  * @writable: Whether the page should be writable or not
3000  *
3001  * Make sure that a guest page has been faulted-in on the host.
3002  *
3003  * Return: Zero on success, negative error code otherwise.
3004  */
3005 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3006 {
3007         return gmap_fault(vcpu->arch.gmap, gpa,
3008                           writable ? FAULT_FLAG_WRITE : 0);
3009 }
3010
3011 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3012                                       unsigned long token)
3013 {
3014         struct kvm_s390_interrupt inti;
3015         struct kvm_s390_irq irq;
3016
3017         if (start_token) {
3018                 irq.u.ext.ext_params2 = token;
3019                 irq.type = KVM_S390_INT_PFAULT_INIT;
3020                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3021         } else {
3022                 inti.type = KVM_S390_INT_PFAULT_DONE;
3023                 inti.parm64 = token;
3024                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3025         }
3026 }
3027
3028 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3029                                      struct kvm_async_pf *work)
3030 {
3031         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3032         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3033 }
3034
3035 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3036                                  struct kvm_async_pf *work)
3037 {
3038         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3039         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3040 }
3041
3042 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3043                                struct kvm_async_pf *work)
3044 {
3045         /* s390 will always inject the page directly */
3046 }
3047
3048 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3049 {
3050         /*
3051          * s390 will always inject the page directly,
3052          * but we still want check_async_completion to cleanup
3053          */
3054         return true;
3055 }
3056
3057 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3058 {
3059         hva_t hva;
3060         struct kvm_arch_async_pf arch;
3061         int rc;
3062
3063         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3064                 return 0;
3065         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3066             vcpu->arch.pfault_compare)
3067                 return 0;
3068         if (psw_extint_disabled(vcpu))
3069                 return 0;
3070         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3071                 return 0;
3072         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3073                 return 0;
3074         if (!vcpu->arch.gmap->pfault_enabled)
3075                 return 0;
3076
3077         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3078         hva += current->thread.gmap_addr & ~PAGE_MASK;
3079         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3080                 return 0;
3081
3082         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3083         return rc;
3084 }
3085
3086 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3087 {
3088         int rc, cpuflags;
3089
3090         /*
3091          * On s390 notifications for arriving pages will be delivered directly
3092          * to the guest but the house keeping for completed pfaults is
3093          * handled outside the worker.
3094          */
3095         kvm_check_async_pf_completion(vcpu);
3096
3097         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3098         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3099
3100         if (need_resched())
3101                 schedule();
3102
3103         if (test_cpu_flag(CIF_MCCK_PENDING))
3104                 s390_handle_mcck();
3105
3106         if (!kvm_is_ucontrol(vcpu->kvm)) {
3107                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3108                 if (rc)
3109                         return rc;
3110         }
3111
3112         rc = kvm_s390_handle_requests(vcpu);
3113         if (rc)
3114                 return rc;
3115
3116         if (guestdbg_enabled(vcpu)) {
3117                 kvm_s390_backup_guest_per_regs(vcpu);
3118                 kvm_s390_patch_guest_per_regs(vcpu);
3119         }
3120
3121         vcpu->arch.sie_block->icptcode = 0;
3122         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3123         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3124         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3125
3126         return 0;
3127 }
3128
3129 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3130 {
3131         struct kvm_s390_pgm_info pgm_info = {
3132                 .code = PGM_ADDRESSING,
3133         };
3134         u8 opcode, ilen;
3135         int rc;
3136
3137         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3138         trace_kvm_s390_sie_fault(vcpu);
3139
3140         /*
3141          * We want to inject an addressing exception, which is defined as a
3142          * suppressing or terminating exception. However, since we came here
3143          * by a DAT access exception, the PSW still points to the faulting
3144          * instruction since DAT exceptions are nullifying. So we've got
3145          * to look up the current opcode to get the length of the instruction
3146          * to be able to forward the PSW.
3147          */
3148         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3149         ilen = insn_length(opcode);
3150         if (rc < 0) {
3151                 return rc;
3152         } else if (rc) {
3153                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3154                  * Forward by arbitrary ilc, injection will take care of
3155                  * nullification if necessary.
3156                  */
3157                 pgm_info = vcpu->arch.pgm;
3158                 ilen = 4;
3159         }
3160         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3161         kvm_s390_forward_psw(vcpu, ilen);
3162         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3163 }
3164
3165 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3166 {
3167         struct mcck_volatile_info *mcck_info;
3168         struct sie_page *sie_page;
3169
3170         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3171                    vcpu->arch.sie_block->icptcode);
3172         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3173
3174         if (guestdbg_enabled(vcpu))
3175                 kvm_s390_restore_guest_per_regs(vcpu);
3176
3177         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3178         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3179
3180         if (exit_reason == -EINTR) {
3181                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3182                 sie_page = container_of(vcpu->arch.sie_block,
3183                                         struct sie_page, sie_block);
3184                 mcck_info = &sie_page->mcck_info;
3185                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3186                 return 0;
3187         }
3188
3189         if (vcpu->arch.sie_block->icptcode > 0) {
3190                 int rc = kvm_handle_sie_intercept(vcpu);
3191
3192                 if (rc != -EOPNOTSUPP)
3193                         return rc;
3194                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3195                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3196                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3197                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3198                 return -EREMOTE;
3199         } else if (exit_reason != -EFAULT) {
3200                 vcpu->stat.exit_null++;
3201                 return 0;
3202         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3203                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3204                 vcpu->run->s390_ucontrol.trans_exc_code =
3205                                                 current->thread.gmap_addr;
3206                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3207                 return -EREMOTE;
3208         } else if (current->thread.gmap_pfault) {
3209                 trace_kvm_s390_major_guest_pfault(vcpu);
3210                 current->thread.gmap_pfault = 0;
3211                 if (kvm_arch_setup_async_pf(vcpu))
3212                         return 0;
3213                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3214         }
3215         return vcpu_post_run_fault_in_sie(vcpu);
3216 }
3217
3218 static int __vcpu_run(struct kvm_vcpu *vcpu)
3219 {
3220         int rc, exit_reason;
3221
3222         /*
3223          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3224          * ning the guest), so that memslots (and other stuff) are protected
3225          */
3226         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3227
3228         do {
3229                 rc = vcpu_pre_run(vcpu);
3230                 if (rc)
3231                         break;
3232
3233                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3234                 /*
3235                  * As PF_VCPU will be used in fault handler, between
3236                  * guest_enter and guest_exit should be no uaccess.
3237                  */
3238                 local_irq_disable();
3239                 guest_enter_irqoff();
3240                 __disable_cpu_timer_accounting(vcpu);
3241                 local_irq_enable();
3242                 exit_reason = sie64a(vcpu->arch.sie_block,
3243                                      vcpu->run->s.regs.gprs);
3244                 local_irq_disable();
3245                 __enable_cpu_timer_accounting(vcpu);
3246                 guest_exit_irqoff();