KVM: s390: vsie: cleanup mcck reinjection
[sfrench/cifs-2.6.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60                            (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65         { "userspace_handled", VCPU_STAT(exit_userspace) },
66         { "exit_null", VCPU_STAT(exit_null) },
67         { "exit_validity", VCPU_STAT(exit_validity) },
68         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69         { "exit_external_request", VCPU_STAT(exit_external_request) },
70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71         { "exit_instruction", VCPU_STAT(exit_instruction) },
72         { "exit_pei", VCPU_STAT(exit_pei) },
73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95         { "instruction_spx", VCPU_STAT(instruction_spx) },
96         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97         { "instruction_stap", VCPU_STAT(instruction_stap) },
98         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102         { "instruction_essa", VCPU_STAT(instruction_essa) },
103         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107         { "instruction_sie", VCPU_STAT(instruction_sie) },
108         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124         { "diagnose_10", VCPU_STAT(diagnose_10) },
125         { "diagnose_44", VCPU_STAT(diagnose_44) },
126         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127         { "diagnose_258", VCPU_STAT(diagnose_258) },
128         { "diagnose_308", VCPU_STAT(diagnose_308) },
129         { "diagnose_500", VCPU_STAT(diagnose_500) },
130         { NULL }
131 };
132
133 struct kvm_s390_tod_clock_ext {
134         __u8 epoch_idx;
135         __u64 tod;
136         __u8 reserved[7];
137 } __packed;
138
139 /* allow nested virtualization in KVM (if enabled by user space) */
140 static int nested;
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
143
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
146
147 unsigned long kvm_s390_fac_list_mask_size(void)
148 {
149         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150         return ARRAY_SIZE(kvm_s390_fac_list_mask);
151 }
152
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
157
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
161
162 /* Section: not file related */
163 int kvm_arch_hardware_enable(void)
164 {
165         /* every s390 is virtualization enabled ;-) */
166         return 0;
167 }
168
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
170                               unsigned long end);
171
172 /*
173  * This callback is executed during stop_machine(). All CPUs are therefore
174  * temporarily stopped. In order not to change guest behavior, we have to
175  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
176  * so a CPU won't be stopped while calculating with the epoch.
177  */
178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
179                           void *v)
180 {
181         struct kvm *kvm;
182         struct kvm_vcpu *vcpu;
183         int i;
184         unsigned long long *delta = v;
185
186         list_for_each_entry(kvm, &vm_list, vm_list) {
187                 kvm->arch.epoch -= *delta;
188                 kvm_for_each_vcpu(i, vcpu, kvm) {
189                         vcpu->arch.sie_block->epoch -= *delta;
190                         if (vcpu->arch.cputm_enabled)
191                                 vcpu->arch.cputm_start += *delta;
192                         if (vcpu->arch.vsie_block)
193                                 vcpu->arch.vsie_block->epoch -= *delta;
194                 }
195         }
196         return NOTIFY_OK;
197 }
198
199 static struct notifier_block kvm_clock_notifier = {
200         .notifier_call = kvm_clock_sync,
201 };
202
203 int kvm_arch_hardware_setup(void)
204 {
205         gmap_notifier.notifier_call = kvm_gmap_notifier;
206         gmap_register_pte_notifier(&gmap_notifier);
207         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
208         gmap_register_pte_notifier(&vsie_gmap_notifier);
209         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
210                                        &kvm_clock_notifier);
211         return 0;
212 }
213
214 void kvm_arch_hardware_unsetup(void)
215 {
216         gmap_unregister_pte_notifier(&gmap_notifier);
217         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
218         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
219                                          &kvm_clock_notifier);
220 }
221
222 static void allow_cpu_feat(unsigned long nr)
223 {
224         set_bit_inv(nr, kvm_s390_available_cpu_feat);
225 }
226
227 static inline int plo_test_bit(unsigned char nr)
228 {
229         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
230         int cc;
231
232         asm volatile(
233                 /* Parameter registers are ignored for "test bit" */
234                 "       plo     0,0,0,0(0)\n"
235                 "       ipm     %0\n"
236                 "       srl     %0,28\n"
237                 : "=d" (cc)
238                 : "d" (r0)
239                 : "cc");
240         return cc == 0;
241 }
242
243 static void kvm_s390_cpu_feat_init(void)
244 {
245         int i;
246
247         for (i = 0; i < 256; ++i) {
248                 if (plo_test_bit(i))
249                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
250         }
251
252         if (test_facility(28)) /* TOD-clock steering */
253                 ptff(kvm_s390_available_subfunc.ptff,
254                      sizeof(kvm_s390_available_subfunc.ptff),
255                      PTFF_QAF);
256
257         if (test_facility(17)) { /* MSA */
258                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
259                               kvm_s390_available_subfunc.kmac);
260                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
261                               kvm_s390_available_subfunc.kmc);
262                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
263                               kvm_s390_available_subfunc.km);
264                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
265                               kvm_s390_available_subfunc.kimd);
266                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
267                               kvm_s390_available_subfunc.klmd);
268         }
269         if (test_facility(76)) /* MSA3 */
270                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
271                               kvm_s390_available_subfunc.pckmo);
272         if (test_facility(77)) { /* MSA4 */
273                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
274                               kvm_s390_available_subfunc.kmctr);
275                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
276                               kvm_s390_available_subfunc.kmf);
277                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
278                               kvm_s390_available_subfunc.kmo);
279                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
280                               kvm_s390_available_subfunc.pcc);
281         }
282         if (test_facility(57)) /* MSA5 */
283                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
284                               kvm_s390_available_subfunc.ppno);
285
286         if (test_facility(146)) /* MSA8 */
287                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
288                               kvm_s390_available_subfunc.kma);
289
290         if (MACHINE_HAS_ESOP)
291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
292         /*
293          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
294          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
295          */
296         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
297             !test_facility(3) || !nested)
298                 return;
299         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
300         if (sclp.has_64bscao)
301                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
302         if (sclp.has_siif)
303                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
304         if (sclp.has_gpere)
305                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
306         if (sclp.has_gsls)
307                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
308         if (sclp.has_ib)
309                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
310         if (sclp.has_cei)
311                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
312         if (sclp.has_ibs)
313                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
314         if (sclp.has_kss)
315                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
316         /*
317          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
318          * all skey handling functions read/set the skey from the PGSTE
319          * instead of the real storage key.
320          *
321          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
322          * pages being detected as preserved although they are resident.
323          *
324          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
325          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
326          *
327          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
328          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
329          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
330          *
331          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
332          * cannot easily shadow the SCA because of the ipte lock.
333          */
334 }
335
336 int kvm_arch_init(void *opaque)
337 {
338         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
339         if (!kvm_s390_dbf)
340                 return -ENOMEM;
341
342         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
343                 debug_unregister(kvm_s390_dbf);
344                 return -ENOMEM;
345         }
346
347         kvm_s390_cpu_feat_init();
348
349         /* Register floating interrupt controller interface. */
350         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
351 }
352
353 void kvm_arch_exit(void)
354 {
355         debug_unregister(kvm_s390_dbf);
356 }
357
358 /* Section: device related */
359 long kvm_arch_dev_ioctl(struct file *filp,
360                         unsigned int ioctl, unsigned long arg)
361 {
362         if (ioctl == KVM_S390_ENABLE_SIE)
363                 return s390_enable_sie();
364         return -EINVAL;
365 }
366
367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
368 {
369         int r;
370
371         switch (ext) {
372         case KVM_CAP_S390_PSW:
373         case KVM_CAP_S390_GMAP:
374         case KVM_CAP_SYNC_MMU:
375 #ifdef CONFIG_KVM_S390_UCONTROL
376         case KVM_CAP_S390_UCONTROL:
377 #endif
378         case KVM_CAP_ASYNC_PF:
379         case KVM_CAP_SYNC_REGS:
380         case KVM_CAP_ONE_REG:
381         case KVM_CAP_ENABLE_CAP:
382         case KVM_CAP_S390_CSS_SUPPORT:
383         case KVM_CAP_IOEVENTFD:
384         case KVM_CAP_DEVICE_CTRL:
385         case KVM_CAP_ENABLE_CAP_VM:
386         case KVM_CAP_S390_IRQCHIP:
387         case KVM_CAP_VM_ATTRIBUTES:
388         case KVM_CAP_MP_STATE:
389         case KVM_CAP_IMMEDIATE_EXIT:
390         case KVM_CAP_S390_INJECT_IRQ:
391         case KVM_CAP_S390_USER_SIGP:
392         case KVM_CAP_S390_USER_STSI:
393         case KVM_CAP_S390_SKEYS:
394         case KVM_CAP_S390_IRQ_STATE:
395         case KVM_CAP_S390_USER_INSTR0:
396         case KVM_CAP_S390_CMMA_MIGRATION:
397         case KVM_CAP_S390_AIS:
398                 r = 1;
399                 break;
400         case KVM_CAP_S390_MEM_OP:
401                 r = MEM_OP_MAX_SIZE;
402                 break;
403         case KVM_CAP_NR_VCPUS:
404         case KVM_CAP_MAX_VCPUS:
405                 r = KVM_S390_BSCA_CPU_SLOTS;
406                 if (!kvm_s390_use_sca_entries())
407                         r = KVM_MAX_VCPUS;
408                 else if (sclp.has_esca && sclp.has_64bscao)
409                         r = KVM_S390_ESCA_CPU_SLOTS;
410                 break;
411         case KVM_CAP_NR_MEMSLOTS:
412                 r = KVM_USER_MEM_SLOTS;
413                 break;
414         case KVM_CAP_S390_COW:
415                 r = MACHINE_HAS_ESOP;
416                 break;
417         case KVM_CAP_S390_VECTOR_REGISTERS:
418                 r = MACHINE_HAS_VX;
419                 break;
420         case KVM_CAP_S390_RI:
421                 r = test_facility(64);
422                 break;
423         case KVM_CAP_S390_GS:
424                 r = test_facility(133);
425                 break;
426         default:
427                 r = 0;
428         }
429         return r;
430 }
431
432 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
433                                         struct kvm_memory_slot *memslot)
434 {
435         gfn_t cur_gfn, last_gfn;
436         unsigned long address;
437         struct gmap *gmap = kvm->arch.gmap;
438
439         /* Loop over all guest pages */
440         last_gfn = memslot->base_gfn + memslot->npages;
441         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
442                 address = gfn_to_hva_memslot(memslot, cur_gfn);
443
444                 if (test_and_clear_guest_dirty(gmap->mm, address))
445                         mark_page_dirty(kvm, cur_gfn);
446                 if (fatal_signal_pending(current))
447                         return;
448                 cond_resched();
449         }
450 }
451
452 /* Section: vm related */
453 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
454
455 /*
456  * Get (and clear) the dirty memory log for a memory slot.
457  */
458 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
459                                struct kvm_dirty_log *log)
460 {
461         int r;
462         unsigned long n;
463         struct kvm_memslots *slots;
464         struct kvm_memory_slot *memslot;
465         int is_dirty = 0;
466
467         if (kvm_is_ucontrol(kvm))
468                 return -EINVAL;
469
470         mutex_lock(&kvm->slots_lock);
471
472         r = -EINVAL;
473         if (log->slot >= KVM_USER_MEM_SLOTS)
474                 goto out;
475
476         slots = kvm_memslots(kvm);
477         memslot = id_to_memslot(slots, log->slot);
478         r = -ENOENT;
479         if (!memslot->dirty_bitmap)
480                 goto out;
481
482         kvm_s390_sync_dirty_log(kvm, memslot);
483         r = kvm_get_dirty_log(kvm, log, &is_dirty);
484         if (r)
485                 goto out;
486
487         /* Clear the dirty log */
488         if (is_dirty) {
489                 n = kvm_dirty_bitmap_bytes(memslot);
490                 memset(memslot->dirty_bitmap, 0, n);
491         }
492         r = 0;
493 out:
494         mutex_unlock(&kvm->slots_lock);
495         return r;
496 }
497
498 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
499 {
500         unsigned int i;
501         struct kvm_vcpu *vcpu;
502
503         kvm_for_each_vcpu(i, vcpu, kvm) {
504                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
505         }
506 }
507
508 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
509 {
510         int r;
511
512         if (cap->flags)
513                 return -EINVAL;
514
515         switch (cap->cap) {
516         case KVM_CAP_S390_IRQCHIP:
517                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
518                 kvm->arch.use_irqchip = 1;
519                 r = 0;
520                 break;
521         case KVM_CAP_S390_USER_SIGP:
522                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
523                 kvm->arch.user_sigp = 1;
524                 r = 0;
525                 break;
526         case KVM_CAP_S390_VECTOR_REGISTERS:
527                 mutex_lock(&kvm->lock);
528                 if (kvm->created_vcpus) {
529                         r = -EBUSY;
530                 } else if (MACHINE_HAS_VX) {
531                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
532                         set_kvm_facility(kvm->arch.model.fac_list, 129);
533                         if (test_facility(134)) {
534                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
535                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
536                         }
537                         if (test_facility(135)) {
538                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
539                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
540                         }
541                         r = 0;
542                 } else
543                         r = -EINVAL;
544                 mutex_unlock(&kvm->lock);
545                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
546                          r ? "(not available)" : "(success)");
547                 break;
548         case KVM_CAP_S390_RI:
549                 r = -EINVAL;
550                 mutex_lock(&kvm->lock);
551                 if (kvm->created_vcpus) {
552                         r = -EBUSY;
553                 } else if (test_facility(64)) {
554                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
555                         set_kvm_facility(kvm->arch.model.fac_list, 64);
556                         r = 0;
557                 }
558                 mutex_unlock(&kvm->lock);
559                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
560                          r ? "(not available)" : "(success)");
561                 break;
562         case KVM_CAP_S390_AIS:
563                 mutex_lock(&kvm->lock);
564                 if (kvm->created_vcpus) {
565                         r = -EBUSY;
566                 } else {
567                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
568                         set_kvm_facility(kvm->arch.model.fac_list, 72);
569                         r = 0;
570                 }
571                 mutex_unlock(&kvm->lock);
572                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
573                          r ? "(not available)" : "(success)");
574                 break;
575         case KVM_CAP_S390_GS:
576                 r = -EINVAL;
577                 mutex_lock(&kvm->lock);
578                 if (atomic_read(&kvm->online_vcpus)) {
579                         r = -EBUSY;
580                 } else if (test_facility(133)) {
581                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
582                         set_kvm_facility(kvm->arch.model.fac_list, 133);
583                         r = 0;
584                 }
585                 mutex_unlock(&kvm->lock);
586                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
587                          r ? "(not available)" : "(success)");
588                 break;
589         case KVM_CAP_S390_USER_STSI:
590                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
591                 kvm->arch.user_stsi = 1;
592                 r = 0;
593                 break;
594         case KVM_CAP_S390_USER_INSTR0:
595                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
596                 kvm->arch.user_instr0 = 1;
597                 icpt_operexc_on_all_vcpus(kvm);
598                 r = 0;
599                 break;
600         default:
601                 r = -EINVAL;
602                 break;
603         }
604         return r;
605 }
606
607 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
608 {
609         int ret;
610
611         switch (attr->attr) {
612         case KVM_S390_VM_MEM_LIMIT_SIZE:
613                 ret = 0;
614                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
615                          kvm->arch.mem_limit);
616                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
617                         ret = -EFAULT;
618                 break;
619         default:
620                 ret = -ENXIO;
621                 break;
622         }
623         return ret;
624 }
625
626 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
627 {
628         int ret;
629         unsigned int idx;
630         switch (attr->attr) {
631         case KVM_S390_VM_MEM_ENABLE_CMMA:
632                 ret = -ENXIO;
633                 if (!sclp.has_cmma)
634                         break;
635
636                 ret = -EBUSY;
637                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
638                 mutex_lock(&kvm->lock);
639                 if (!kvm->created_vcpus) {
640                         kvm->arch.use_cmma = 1;
641                         ret = 0;
642                 }
643                 mutex_unlock(&kvm->lock);
644                 break;
645         case KVM_S390_VM_MEM_CLR_CMMA:
646                 ret = -ENXIO;
647                 if (!sclp.has_cmma)
648                         break;
649                 ret = -EINVAL;
650                 if (!kvm->arch.use_cmma)
651                         break;
652
653                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
654                 mutex_lock(&kvm->lock);
655                 idx = srcu_read_lock(&kvm->srcu);
656                 s390_reset_cmma(kvm->arch.gmap->mm);
657                 srcu_read_unlock(&kvm->srcu, idx);
658                 mutex_unlock(&kvm->lock);
659                 ret = 0;
660                 break;
661         case KVM_S390_VM_MEM_LIMIT_SIZE: {
662                 unsigned long new_limit;
663
664                 if (kvm_is_ucontrol(kvm))
665                         return -EINVAL;
666
667                 if (get_user(new_limit, (u64 __user *)attr->addr))
668                         return -EFAULT;
669
670                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
671                     new_limit > kvm->arch.mem_limit)
672                         return -E2BIG;
673
674                 if (!new_limit)
675                         return -EINVAL;
676
677                 /* gmap_create takes last usable address */
678                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
679                         new_limit -= 1;
680
681                 ret = -EBUSY;
682                 mutex_lock(&kvm->lock);
683                 if (!kvm->created_vcpus) {
684                         /* gmap_create will round the limit up */
685                         struct gmap *new = gmap_create(current->mm, new_limit);
686
687                         if (!new) {
688                                 ret = -ENOMEM;
689                         } else {
690                                 gmap_remove(kvm->arch.gmap);
691                                 new->private = kvm;
692                                 kvm->arch.gmap = new;
693                                 ret = 0;
694                         }
695                 }
696                 mutex_unlock(&kvm->lock);
697                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
698                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
699                          (void *) kvm->arch.gmap->asce);
700                 break;
701         }
702         default:
703                 ret = -ENXIO;
704                 break;
705         }
706         return ret;
707 }
708
709 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
710
711 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
712 {
713         struct kvm_vcpu *vcpu;
714         int i;
715
716         if (!test_kvm_facility(kvm, 76))
717                 return -EINVAL;
718
719         mutex_lock(&kvm->lock);
720         switch (attr->attr) {
721         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
722                 get_random_bytes(
723                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
724                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
725                 kvm->arch.crypto.aes_kw = 1;
726                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
727                 break;
728         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
729                 get_random_bytes(
730                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
731                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
732                 kvm->arch.crypto.dea_kw = 1;
733                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
734                 break;
735         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
736                 kvm->arch.crypto.aes_kw = 0;
737                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
738                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
739                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
740                 break;
741         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
742                 kvm->arch.crypto.dea_kw = 0;
743                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
744                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
745                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
746                 break;
747         default:
748                 mutex_unlock(&kvm->lock);
749                 return -ENXIO;
750         }
751
752         kvm_for_each_vcpu(i, vcpu, kvm) {
753                 kvm_s390_vcpu_crypto_setup(vcpu);
754                 exit_sie(vcpu);
755         }
756         mutex_unlock(&kvm->lock);
757         return 0;
758 }
759
760 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
761 {
762         int cx;
763         struct kvm_vcpu *vcpu;
764
765         kvm_for_each_vcpu(cx, vcpu, kvm)
766                 kvm_s390_sync_request(req, vcpu);
767 }
768
769 /*
770  * Must be called with kvm->srcu held to avoid races on memslots, and with
771  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
772  */
773 static int kvm_s390_vm_start_migration(struct kvm *kvm)
774 {
775         struct kvm_s390_migration_state *mgs;
776         struct kvm_memory_slot *ms;
777         /* should be the only one */
778         struct kvm_memslots *slots;
779         unsigned long ram_pages;
780         int slotnr;
781
782         /* migration mode already enabled */
783         if (kvm->arch.migration_state)
784                 return 0;
785
786         slots = kvm_memslots(kvm);
787         if (!slots || !slots->used_slots)
788                 return -EINVAL;
789
790         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
791         if (!mgs)
792                 return -ENOMEM;
793         kvm->arch.migration_state = mgs;
794
795         if (kvm->arch.use_cmma) {
796                 /*
797                  * Get the last slot. They should be sorted by base_gfn, so the
798                  * last slot is also the one at the end of the address space.
799                  * We have verified above that at least one slot is present.
800                  */
801                 ms = slots->memslots + slots->used_slots - 1;
802                 /* round up so we only use full longs */
803                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
804                 /* allocate enough bytes to store all the bits */
805                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
806                 if (!mgs->pgste_bitmap) {
807                         kfree(mgs);
808                         kvm->arch.migration_state = NULL;
809                         return -ENOMEM;
810                 }
811
812                 mgs->bitmap_size = ram_pages;
813                 atomic64_set(&mgs->dirty_pages, ram_pages);
814                 /* mark all the pages in active slots as dirty */
815                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
816                         ms = slots->memslots + slotnr;
817                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
818                 }
819
820                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
821         }
822         return 0;
823 }
824
825 /*
826  * Must be called with kvm->lock to avoid races with ourselves and
827  * kvm_s390_vm_start_migration.
828  */
829 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
830 {
831         struct kvm_s390_migration_state *mgs;
832
833         /* migration mode already disabled */
834         if (!kvm->arch.migration_state)
835                 return 0;
836         mgs = kvm->arch.migration_state;
837         kvm->arch.migration_state = NULL;
838
839         if (kvm->arch.use_cmma) {
840                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
841                 vfree(mgs->pgste_bitmap);
842         }
843         kfree(mgs);
844         return 0;
845 }
846
847 static int kvm_s390_vm_set_migration(struct kvm *kvm,
848                                      struct kvm_device_attr *attr)
849 {
850         int idx, res = -ENXIO;
851
852         mutex_lock(&kvm->lock);
853         switch (attr->attr) {
854         case KVM_S390_VM_MIGRATION_START:
855                 idx = srcu_read_lock(&kvm->srcu);
856                 res = kvm_s390_vm_start_migration(kvm);
857                 srcu_read_unlock(&kvm->srcu, idx);
858                 break;
859         case KVM_S390_VM_MIGRATION_STOP:
860                 res = kvm_s390_vm_stop_migration(kvm);
861                 break;
862         default:
863                 break;
864         }
865         mutex_unlock(&kvm->lock);
866
867         return res;
868 }
869
870 static int kvm_s390_vm_get_migration(struct kvm *kvm,
871                                      struct kvm_device_attr *attr)
872 {
873         u64 mig = (kvm->arch.migration_state != NULL);
874
875         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
876                 return -ENXIO;
877
878         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
879                 return -EFAULT;
880         return 0;
881 }
882
883 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
884 {
885         struct kvm_s390_vm_tod_clock gtod;
886
887         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
888                 return -EFAULT;
889
890         if (test_kvm_facility(kvm, 139))
891                 kvm_s390_set_tod_clock_ext(kvm, &gtod);
892         else if (gtod.epoch_idx == 0)
893                 kvm_s390_set_tod_clock(kvm, gtod.tod);
894         else
895                 return -EINVAL;
896
897         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
898                 gtod.epoch_idx, gtod.tod);
899
900         return 0;
901 }
902
903 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
904 {
905         u8 gtod_high;
906
907         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
908                                            sizeof(gtod_high)))
909                 return -EFAULT;
910
911         if (gtod_high != 0)
912                 return -EINVAL;
913         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
914
915         return 0;
916 }
917
918 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
919 {
920         u64 gtod;
921
922         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
923                 return -EFAULT;
924
925         kvm_s390_set_tod_clock(kvm, gtod);
926         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
927         return 0;
928 }
929
930 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
931 {
932         int ret;
933
934         if (attr->flags)
935                 return -EINVAL;
936
937         switch (attr->attr) {
938         case KVM_S390_VM_TOD_EXT:
939                 ret = kvm_s390_set_tod_ext(kvm, attr);
940                 break;
941         case KVM_S390_VM_TOD_HIGH:
942                 ret = kvm_s390_set_tod_high(kvm, attr);
943                 break;
944         case KVM_S390_VM_TOD_LOW:
945                 ret = kvm_s390_set_tod_low(kvm, attr);
946                 break;
947         default:
948                 ret = -ENXIO;
949                 break;
950         }
951         return ret;
952 }
953
954 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
955                                         struct kvm_s390_vm_tod_clock *gtod)
956 {
957         struct kvm_s390_tod_clock_ext htod;
958
959         preempt_disable();
960
961         get_tod_clock_ext((char *)&htod);
962
963         gtod->tod = htod.tod + kvm->arch.epoch;
964         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
965
966         if (gtod->tod < htod.tod)
967                 gtod->epoch_idx += 1;
968
969         preempt_enable();
970 }
971
972 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
973 {
974         struct kvm_s390_vm_tod_clock gtod;
975
976         memset(&gtod, 0, sizeof(gtod));
977
978         if (test_kvm_facility(kvm, 139))
979                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
980         else
981                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
982
983         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
984                 return -EFAULT;
985
986         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
987                 gtod.epoch_idx, gtod.tod);
988         return 0;
989 }
990
991 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
992 {
993         u8 gtod_high = 0;
994
995         if (copy_to_user((void __user *)attr->addr, &gtod_high,
996                                          sizeof(gtod_high)))
997                 return -EFAULT;
998         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
999
1000         return 0;
1001 }
1002
1003 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1004 {
1005         u64 gtod;
1006
1007         gtod = kvm_s390_get_tod_clock_fast(kvm);
1008         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1009                 return -EFAULT;
1010         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1011
1012         return 0;
1013 }
1014
1015 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1016 {
1017         int ret;
1018
1019         if (attr->flags)
1020                 return -EINVAL;
1021
1022         switch (attr->attr) {
1023         case KVM_S390_VM_TOD_EXT:
1024                 ret = kvm_s390_get_tod_ext(kvm, attr);
1025                 break;
1026         case KVM_S390_VM_TOD_HIGH:
1027                 ret = kvm_s390_get_tod_high(kvm, attr);
1028                 break;
1029         case KVM_S390_VM_TOD_LOW:
1030                 ret = kvm_s390_get_tod_low(kvm, attr);
1031                 break;
1032         default:
1033                 ret = -ENXIO;
1034                 break;
1035         }
1036         return ret;
1037 }
1038
1039 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1040 {
1041         struct kvm_s390_vm_cpu_processor *proc;
1042         u16 lowest_ibc, unblocked_ibc;
1043         int ret = 0;
1044
1045         mutex_lock(&kvm->lock);
1046         if (kvm->created_vcpus) {
1047                 ret = -EBUSY;
1048                 goto out;
1049         }
1050         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1051         if (!proc) {
1052                 ret = -ENOMEM;
1053                 goto out;
1054         }
1055         if (!copy_from_user(proc, (void __user *)attr->addr,
1056                             sizeof(*proc))) {
1057                 kvm->arch.model.cpuid = proc->cpuid;
1058                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1059                 unblocked_ibc = sclp.ibc & 0xfff;
1060                 if (lowest_ibc && proc->ibc) {
1061                         if (proc->ibc > unblocked_ibc)
1062                                 kvm->arch.model.ibc = unblocked_ibc;
1063                         else if (proc->ibc < lowest_ibc)
1064                                 kvm->arch.model.ibc = lowest_ibc;
1065                         else
1066                                 kvm->arch.model.ibc = proc->ibc;
1067                 }
1068                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1069                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1070                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1071                          kvm->arch.model.ibc,
1072                          kvm->arch.model.cpuid);
1073                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1074                          kvm->arch.model.fac_list[0],
1075                          kvm->arch.model.fac_list[1],
1076                          kvm->arch.model.fac_list[2]);
1077         } else
1078                 ret = -EFAULT;
1079         kfree(proc);
1080 out:
1081         mutex_unlock(&kvm->lock);
1082         return ret;
1083 }
1084
1085 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1086                                        struct kvm_device_attr *attr)
1087 {
1088         struct kvm_s390_vm_cpu_feat data;
1089         int ret = -EBUSY;
1090
1091         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1092                 return -EFAULT;
1093         if (!bitmap_subset((unsigned long *) data.feat,
1094                            kvm_s390_available_cpu_feat,
1095                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1096                 return -EINVAL;
1097
1098         mutex_lock(&kvm->lock);
1099         if (!atomic_read(&kvm->online_vcpus)) {
1100                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1101                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1102                 ret = 0;
1103         }
1104         mutex_unlock(&kvm->lock);
1105         return ret;
1106 }
1107
1108 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1109                                           struct kvm_device_attr *attr)
1110 {
1111         /*
1112          * Once supported by kernel + hw, we have to store the subfunctions
1113          * in kvm->arch and remember that user space configured them.
1114          */
1115         return -ENXIO;
1116 }
1117
1118 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1119 {
1120         int ret = -ENXIO;
1121
1122         switch (attr->attr) {
1123         case KVM_S390_VM_CPU_PROCESSOR:
1124                 ret = kvm_s390_set_processor(kvm, attr);
1125                 break;
1126         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1127                 ret = kvm_s390_set_processor_feat(kvm, attr);
1128                 break;
1129         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1130                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1131                 break;
1132         }
1133         return ret;
1134 }
1135
1136 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1137 {
1138         struct kvm_s390_vm_cpu_processor *proc;
1139         int ret = 0;
1140
1141         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1142         if (!proc) {
1143                 ret = -ENOMEM;
1144                 goto out;
1145         }
1146         proc->cpuid = kvm->arch.model.cpuid;
1147         proc->ibc = kvm->arch.model.ibc;
1148         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1149                S390_ARCH_FAC_LIST_SIZE_BYTE);
1150         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1151                  kvm->arch.model.ibc,
1152                  kvm->arch.model.cpuid);
1153         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1154                  kvm->arch.model.fac_list[0],
1155                  kvm->arch.model.fac_list[1],
1156                  kvm->arch.model.fac_list[2]);
1157         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1158                 ret = -EFAULT;
1159         kfree(proc);
1160 out:
1161         return ret;
1162 }
1163
1164 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1165 {
1166         struct kvm_s390_vm_cpu_machine *mach;
1167         int ret = 0;
1168
1169         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1170         if (!mach) {
1171                 ret = -ENOMEM;
1172                 goto out;
1173         }
1174         get_cpu_id((struct cpuid *) &mach->cpuid);
1175         mach->ibc = sclp.ibc;
1176         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1177                S390_ARCH_FAC_LIST_SIZE_BYTE);
1178         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1179                sizeof(S390_lowcore.stfle_fac_list));
1180         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1181                  kvm->arch.model.ibc,
1182                  kvm->arch.model.cpuid);
1183         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1184                  mach->fac_mask[0],
1185                  mach->fac_mask[1],
1186                  mach->fac_mask[2]);
1187         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1188                  mach->fac_list[0],
1189                  mach->fac_list[1],
1190                  mach->fac_list[2]);
1191         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1192                 ret = -EFAULT;
1193         kfree(mach);
1194 out:
1195         return ret;
1196 }
1197
1198 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1199                                        struct kvm_device_attr *attr)
1200 {
1201         struct kvm_s390_vm_cpu_feat data;
1202
1203         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1204                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1205         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1206                 return -EFAULT;
1207         return 0;
1208 }
1209
1210 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1211                                      struct kvm_device_attr *attr)
1212 {
1213         struct kvm_s390_vm_cpu_feat data;
1214
1215         bitmap_copy((unsigned long *) data.feat,
1216                     kvm_s390_available_cpu_feat,
1217                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1218         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1219                 return -EFAULT;
1220         return 0;
1221 }
1222
1223 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1224                                           struct kvm_device_attr *attr)
1225 {
1226         /*
1227          * Once we can actually configure subfunctions (kernel + hw support),
1228          * we have to check if they were already set by user space, if so copy
1229          * them from kvm->arch.
1230          */
1231         return -ENXIO;
1232 }
1233
1234 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1235                                         struct kvm_device_attr *attr)
1236 {
1237         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1238             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1239                 return -EFAULT;
1240         return 0;
1241 }
1242 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1243 {
1244         int ret = -ENXIO;
1245
1246         switch (attr->attr) {
1247         case KVM_S390_VM_CPU_PROCESSOR:
1248                 ret = kvm_s390_get_processor(kvm, attr);
1249                 break;
1250         case KVM_S390_VM_CPU_MACHINE:
1251                 ret = kvm_s390_get_machine(kvm, attr);
1252                 break;
1253         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1254                 ret = kvm_s390_get_processor_feat(kvm, attr);
1255                 break;
1256         case KVM_S390_VM_CPU_MACHINE_FEAT:
1257                 ret = kvm_s390_get_machine_feat(kvm, attr);
1258                 break;
1259         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1260                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1261                 break;
1262         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1263                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1264                 break;
1265         }
1266         return ret;
1267 }
1268
1269 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1270 {
1271         int ret;
1272
1273         switch (attr->group) {
1274         case KVM_S390_VM_MEM_CTRL:
1275                 ret = kvm_s390_set_mem_control(kvm, attr);
1276                 break;
1277         case KVM_S390_VM_TOD:
1278                 ret = kvm_s390_set_tod(kvm, attr);
1279                 break;
1280         case KVM_S390_VM_CPU_MODEL:
1281                 ret = kvm_s390_set_cpu_model(kvm, attr);
1282                 break;
1283         case KVM_S390_VM_CRYPTO:
1284                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1285                 break;
1286         case KVM_S390_VM_MIGRATION:
1287                 ret = kvm_s390_vm_set_migration(kvm, attr);
1288                 break;
1289         default:
1290                 ret = -ENXIO;
1291                 break;
1292         }
1293
1294         return ret;
1295 }
1296
1297 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1298 {
1299         int ret;
1300
1301         switch (attr->group) {
1302         case KVM_S390_VM_MEM_CTRL:
1303                 ret = kvm_s390_get_mem_control(kvm, attr);
1304                 break;
1305         case KVM_S390_VM_TOD:
1306                 ret = kvm_s390_get_tod(kvm, attr);
1307                 break;
1308         case KVM_S390_VM_CPU_MODEL:
1309                 ret = kvm_s390_get_cpu_model(kvm, attr);
1310                 break;
1311         case KVM_S390_VM_MIGRATION:
1312                 ret = kvm_s390_vm_get_migration(kvm, attr);
1313                 break;
1314         default:
1315                 ret = -ENXIO;
1316                 break;
1317         }
1318
1319         return ret;
1320 }
1321
1322 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1323 {
1324         int ret;
1325
1326         switch (attr->group) {
1327         case KVM_S390_VM_MEM_CTRL:
1328                 switch (attr->attr) {
1329                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1330                 case KVM_S390_VM_MEM_CLR_CMMA:
1331                         ret = sclp.has_cmma ? 0 : -ENXIO;
1332                         break;
1333                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1334                         ret = 0;
1335                         break;
1336                 default:
1337                         ret = -ENXIO;
1338                         break;
1339                 }
1340                 break;
1341         case KVM_S390_VM_TOD:
1342                 switch (attr->attr) {
1343                 case KVM_S390_VM_TOD_LOW:
1344                 case KVM_S390_VM_TOD_HIGH:
1345                         ret = 0;
1346                         break;
1347                 default:
1348                         ret = -ENXIO;
1349                         break;
1350                 }
1351                 break;
1352         case KVM_S390_VM_CPU_MODEL:
1353                 switch (attr->attr) {
1354                 case KVM_S390_VM_CPU_PROCESSOR:
1355                 case KVM_S390_VM_CPU_MACHINE:
1356                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1357                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1358                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1359                         ret = 0;
1360                         break;
1361                 /* configuring subfunctions is not supported yet */
1362                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1363                 default:
1364                         ret = -ENXIO;
1365                         break;
1366                 }
1367                 break;
1368         case KVM_S390_VM_CRYPTO:
1369                 switch (attr->attr) {
1370                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1371                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1372                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1373                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1374                         ret = 0;
1375                         break;
1376                 default:
1377                         ret = -ENXIO;
1378                         break;
1379                 }
1380                 break;
1381         case KVM_S390_VM_MIGRATION:
1382                 ret = 0;
1383                 break;
1384         default:
1385                 ret = -ENXIO;
1386                 break;
1387         }
1388
1389         return ret;
1390 }
1391
1392 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1393 {
1394         uint8_t *keys;
1395         uint64_t hva;
1396         int srcu_idx, i, r = 0;
1397
1398         if (args->flags != 0)
1399                 return -EINVAL;
1400
1401         /* Is this guest using storage keys? */
1402         if (!mm_use_skey(current->mm))
1403                 return KVM_S390_GET_SKEYS_NONE;
1404
1405         /* Enforce sane limit on memory allocation */
1406         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1407                 return -EINVAL;
1408
1409         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1410         if (!keys)
1411                 return -ENOMEM;
1412
1413         down_read(&current->mm->mmap_sem);
1414         srcu_idx = srcu_read_lock(&kvm->srcu);
1415         for (i = 0; i < args->count; i++) {
1416                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1417                 if (kvm_is_error_hva(hva)) {
1418                         r = -EFAULT;
1419                         break;
1420                 }
1421
1422                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1423                 if (r)
1424                         break;
1425         }
1426         srcu_read_unlock(&kvm->srcu, srcu_idx);
1427         up_read(&current->mm->mmap_sem);
1428
1429         if (!r) {
1430                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1431                                  sizeof(uint8_t) * args->count);
1432                 if (r)
1433                         r = -EFAULT;
1434         }
1435
1436         kvfree(keys);
1437         return r;
1438 }
1439
1440 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1441 {
1442         uint8_t *keys;
1443         uint64_t hva;
1444         int srcu_idx, i, r = 0;
1445
1446         if (args->flags != 0)
1447                 return -EINVAL;
1448
1449         /* Enforce sane limit on memory allocation */
1450         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1451                 return -EINVAL;
1452
1453         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1454         if (!keys)
1455                 return -ENOMEM;
1456
1457         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1458                            sizeof(uint8_t) * args->count);
1459         if (r) {
1460                 r = -EFAULT;
1461                 goto out;
1462         }
1463
1464         /* Enable storage key handling for the guest */
1465         r = s390_enable_skey();
1466         if (r)
1467                 goto out;
1468
1469         down_read(&current->mm->mmap_sem);
1470         srcu_idx = srcu_read_lock(&kvm->srcu);
1471         for (i = 0; i < args->count; i++) {
1472                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1473                 if (kvm_is_error_hva(hva)) {
1474                         r = -EFAULT;
1475                         break;
1476                 }
1477
1478                 /* Lowest order bit is reserved */
1479                 if (keys[i] & 0x01) {
1480                         r = -EINVAL;
1481                         break;
1482                 }
1483
1484                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1485                 if (r)
1486                         break;
1487         }
1488         srcu_read_unlock(&kvm->srcu, srcu_idx);
1489         up_read(&current->mm->mmap_sem);
1490 out:
1491         kvfree(keys);
1492         return r;
1493 }
1494
1495 /*
1496  * Base address and length must be sent at the start of each block, therefore
1497  * it's cheaper to send some clean data, as long as it's less than the size of
1498  * two longs.
1499  */
1500 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1501 /* for consistency */
1502 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1503
1504 /*
1505  * This function searches for the next page with dirty CMMA attributes, and
1506  * saves the attributes in the buffer up to either the end of the buffer or
1507  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1508  * no trailing clean bytes are saved.
1509  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1510  * output buffer will indicate 0 as length.
1511  */
1512 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1513                                   struct kvm_s390_cmma_log *args)
1514 {
1515         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1516         unsigned long bufsize, hva, pgstev, i, next, cur;
1517         int srcu_idx, peek, r = 0, rr;
1518         u8 *res;
1519
1520         cur = args->start_gfn;
1521         i = next = pgstev = 0;
1522
1523         if (unlikely(!kvm->arch.use_cmma))
1524                 return -ENXIO;
1525         /* Invalid/unsupported flags were specified */
1526         if (args->flags & ~KVM_S390_CMMA_PEEK)
1527                 return -EINVAL;
1528         /* Migration mode query, and we are not doing a migration */
1529         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1530         if (!peek && !s)
1531                 return -EINVAL;
1532         /* CMMA is disabled or was not used, or the buffer has length zero */
1533         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1534         if (!bufsize || !kvm->mm->context.use_cmma) {
1535                 memset(args, 0, sizeof(*args));
1536                 return 0;
1537         }
1538
1539         if (!peek) {
1540                 /* We are not peeking, and there are no dirty pages */
1541                 if (!atomic64_read(&s->dirty_pages)) {
1542                         memset(args, 0, sizeof(*args));
1543                         return 0;
1544                 }
1545                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1546                                     args->start_gfn);
1547                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1548                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1549                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1550                         memset(args, 0, sizeof(*args));
1551                         return 0;
1552                 }
1553                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1554         }
1555
1556         res = vmalloc(bufsize);
1557         if (!res)
1558                 return -ENOMEM;
1559
1560         args->start_gfn = cur;
1561
1562         down_read(&kvm->mm->mmap_sem);
1563         srcu_idx = srcu_read_lock(&kvm->srcu);
1564         while (i < bufsize) {
1565                 hva = gfn_to_hva(kvm, cur);
1566                 if (kvm_is_error_hva(hva)) {
1567                         r = -EFAULT;
1568                         break;
1569                 }
1570                 /* decrement only if we actually flipped the bit to 0 */
1571                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1572                         atomic64_dec(&s->dirty_pages);
1573                 r = get_pgste(kvm->mm, hva, &pgstev);
1574                 if (r < 0)
1575                         pgstev = 0;
1576                 /* save the value */
1577                 res[i++] = (pgstev >> 24) & 0x43;
1578                 /*
1579                  * if the next bit is too far away, stop.
1580                  * if we reached the previous "next", find the next one
1581                  */
1582                 if (!peek) {
1583                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1584                                 break;
1585                         if (cur == next)
1586                                 next = find_next_bit(s->pgste_bitmap,
1587                                                      s->bitmap_size, cur + 1);
1588                 /* reached the end of the bitmap or of the buffer, stop */
1589                         if ((next >= s->bitmap_size) ||
1590                             (next >= args->start_gfn + bufsize))
1591                                 break;
1592                 }
1593                 cur++;
1594         }
1595         srcu_read_unlock(&kvm->srcu, srcu_idx);
1596         up_read(&kvm->mm->mmap_sem);
1597         args->count = i;
1598         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1599
1600         rr = copy_to_user((void __user *)args->values, res, args->count);
1601         if (rr)
1602                 r = -EFAULT;
1603
1604         vfree(res);
1605         return r;
1606 }
1607
1608 /*
1609  * This function sets the CMMA attributes for the given pages. If the input
1610  * buffer has zero length, no action is taken, otherwise the attributes are
1611  * set and the mm->context.use_cmma flag is set.
1612  */
1613 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1614                                   const struct kvm_s390_cmma_log *args)
1615 {
1616         unsigned long hva, mask, pgstev, i;
1617         uint8_t *bits;
1618         int srcu_idx, r = 0;
1619
1620         mask = args->mask;
1621
1622         if (!kvm->arch.use_cmma)
1623                 return -ENXIO;
1624         /* invalid/unsupported flags */
1625         if (args->flags != 0)
1626                 return -EINVAL;
1627         /* Enforce sane limit on memory allocation */
1628         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1629                 return -EINVAL;
1630         /* Nothing to do */
1631         if (args->count == 0)
1632                 return 0;
1633
1634         bits = vmalloc(sizeof(*bits) * args->count);
1635         if (!bits)
1636                 return -ENOMEM;
1637
1638         r = copy_from_user(bits, (void __user *)args->values, args->count);
1639         if (r) {
1640                 r = -EFAULT;
1641                 goto out;
1642         }
1643
1644         down_read(&kvm->mm->mmap_sem);
1645         srcu_idx = srcu_read_lock(&kvm->srcu);
1646         for (i = 0; i < args->count; i++) {
1647                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1648                 if (kvm_is_error_hva(hva)) {
1649                         r = -EFAULT;
1650                         break;
1651                 }
1652
1653                 pgstev = bits[i];
1654                 pgstev = pgstev << 24;
1655                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1656                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1657         }
1658         srcu_read_unlock(&kvm->srcu, srcu_idx);
1659         up_read(&kvm->mm->mmap_sem);
1660
1661         if (!kvm->mm->context.use_cmma) {
1662                 down_write(&kvm->mm->mmap_sem);
1663                 kvm->mm->context.use_cmma = 1;
1664                 up_write(&kvm->mm->mmap_sem);
1665         }
1666 out:
1667         vfree(bits);
1668         return r;
1669 }
1670
1671 long kvm_arch_vm_ioctl(struct file *filp,
1672                        unsigned int ioctl, unsigned long arg)
1673 {
1674         struct kvm *kvm = filp->private_data;
1675         void __user *argp = (void __user *)arg;
1676         struct kvm_device_attr attr;
1677         int r;
1678
1679         switch (ioctl) {
1680         case KVM_S390_INTERRUPT: {
1681                 struct kvm_s390_interrupt s390int;
1682
1683                 r = -EFAULT;
1684                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1685                         break;
1686                 r = kvm_s390_inject_vm(kvm, &s390int);
1687                 break;
1688         }
1689         case KVM_ENABLE_CAP: {
1690                 struct kvm_enable_cap cap;
1691                 r = -EFAULT;
1692                 if (copy_from_user(&cap, argp, sizeof(cap)))
1693                         break;
1694                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1695                 break;
1696         }
1697         case KVM_CREATE_IRQCHIP: {
1698                 struct kvm_irq_routing_entry routing;
1699
1700                 r = -EINVAL;
1701                 if (kvm->arch.use_irqchip) {
1702                         /* Set up dummy routing. */
1703                         memset(&routing, 0, sizeof(routing));
1704                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1705                 }
1706                 break;
1707         }
1708         case KVM_SET_DEVICE_ATTR: {
1709                 r = -EFAULT;
1710                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1711                         break;
1712                 r = kvm_s390_vm_set_attr(kvm, &attr);
1713                 break;
1714         }
1715         case KVM_GET_DEVICE_ATTR: {
1716                 r = -EFAULT;
1717                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1718                         break;
1719                 r = kvm_s390_vm_get_attr(kvm, &attr);
1720                 break;
1721         }
1722         case KVM_HAS_DEVICE_ATTR: {
1723                 r = -EFAULT;
1724                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1725                         break;
1726                 r = kvm_s390_vm_has_attr(kvm, &attr);
1727                 break;
1728         }
1729         case KVM_S390_GET_SKEYS: {
1730                 struct kvm_s390_skeys args;
1731
1732                 r = -EFAULT;
1733                 if (copy_from_user(&args, argp,
1734                                    sizeof(struct kvm_s390_skeys)))
1735                         break;
1736                 r = kvm_s390_get_skeys(kvm, &args);
1737                 break;
1738         }
1739         case KVM_S390_SET_SKEYS: {
1740                 struct kvm_s390_skeys args;
1741
1742                 r = -EFAULT;
1743                 if (copy_from_user(&args, argp,
1744                                    sizeof(struct kvm_s390_skeys)))
1745                         break;
1746                 r = kvm_s390_set_skeys(kvm, &args);
1747                 break;
1748         }
1749         case KVM_S390_GET_CMMA_BITS: {
1750                 struct kvm_s390_cmma_log args;
1751
1752                 r = -EFAULT;
1753                 if (copy_from_user(&args, argp, sizeof(args)))
1754                         break;
1755                 r = kvm_s390_get_cmma_bits(kvm, &args);
1756                 if (!r) {
1757                         r = copy_to_user(argp, &args, sizeof(args));
1758                         if (r)
1759                                 r = -EFAULT;
1760                 }
1761                 break;
1762         }
1763         case KVM_S390_SET_CMMA_BITS: {
1764                 struct kvm_s390_cmma_log args;
1765
1766                 r = -EFAULT;
1767                 if (copy_from_user(&args, argp, sizeof(args)))
1768                         break;
1769                 r = kvm_s390_set_cmma_bits(kvm, &args);
1770                 break;
1771         }
1772         default:
1773                 r = -ENOTTY;
1774         }
1775
1776         return r;
1777 }
1778
1779 static int kvm_s390_query_ap_config(u8 *config)
1780 {
1781         u32 fcn_code = 0x04000000UL;
1782         u32 cc = 0;
1783
1784         memset(config, 0, 128);
1785         asm volatile(
1786                 "lgr 0,%1\n"
1787                 "lgr 2,%2\n"
1788                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1789                 "0: ipm %0\n"
1790                 "srl %0,28\n"
1791                 "1:\n"
1792                 EX_TABLE(0b, 1b)
1793                 : "+r" (cc)
1794                 : "r" (fcn_code), "r" (config)
1795                 : "cc", "0", "2", "memory"
1796         );
1797
1798         return cc;
1799 }
1800
1801 static int kvm_s390_apxa_installed(void)
1802 {
1803         u8 config[128];
1804         int cc;
1805
1806         if (test_facility(12)) {
1807                 cc = kvm_s390_query_ap_config(config);
1808
1809                 if (cc)
1810                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1811                 else
1812                         return config[0] & 0x40;
1813         }
1814
1815         return 0;
1816 }
1817
1818 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1819 {
1820         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1821
1822         if (kvm_s390_apxa_installed())
1823                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1824         else
1825                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1826 }
1827
1828 static u64 kvm_s390_get_initial_cpuid(void)
1829 {
1830         struct cpuid cpuid;
1831
1832         get_cpu_id(&cpuid);
1833         cpuid.version = 0xff;
1834         return *((u64 *) &cpuid);
1835 }
1836
1837 static void kvm_s390_crypto_init(struct kvm *kvm)
1838 {
1839         if (!test_kvm_facility(kvm, 76))
1840                 return;
1841
1842         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1843         kvm_s390_set_crycb_format(kvm);
1844
1845         /* Enable AES/DEA protected key functions by default */
1846         kvm->arch.crypto.aes_kw = 1;
1847         kvm->arch.crypto.dea_kw = 1;
1848         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1849                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1850         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1851                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1852 }
1853
1854 static void sca_dispose(struct kvm *kvm)
1855 {
1856         if (kvm->arch.use_esca)
1857                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1858         else
1859                 free_page((unsigned long)(kvm->arch.sca));
1860         kvm->arch.sca = NULL;
1861 }
1862
1863 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1864 {
1865         gfp_t alloc_flags = GFP_KERNEL;
1866         int i, rc;
1867         char debug_name[16];
1868         static unsigned long sca_offset;
1869
1870         rc = -EINVAL;
1871 #ifdef CONFIG_KVM_S390_UCONTROL
1872         if (type & ~KVM_VM_S390_UCONTROL)
1873                 goto out_err;
1874         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1875                 goto out_err;
1876 #else
1877         if (type)
1878                 goto out_err;
1879 #endif
1880
1881         rc = s390_enable_sie();
1882         if (rc)
1883                 goto out_err;
1884
1885         rc = -ENOMEM;
1886
1887         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1888
1889         kvm->arch.use_esca = 0; /* start with basic SCA */
1890         if (!sclp.has_64bscao)
1891                 alloc_flags |= GFP_DMA;
1892         rwlock_init(&kvm->arch.sca_lock);
1893         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1894         if (!kvm->arch.sca)
1895                 goto out_err;
1896         spin_lock(&kvm_lock);
1897         sca_offset += 16;
1898         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1899                 sca_offset = 0;
1900         kvm->arch.sca = (struct bsca_block *)
1901                         ((char *) kvm->arch.sca + sca_offset);
1902         spin_unlock(&kvm_lock);
1903
1904         sprintf(debug_name, "kvm-%u", current->pid);
1905
1906         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1907         if (!kvm->arch.dbf)
1908                 goto out_err;
1909
1910         kvm->arch.sie_page2 =
1911              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1912         if (!kvm->arch.sie_page2)
1913                 goto out_err;
1914
1915         /* Populate the facility mask initially. */
1916         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1917                sizeof(S390_lowcore.stfle_fac_list));
1918         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1919                 if (i < kvm_s390_fac_list_mask_size())
1920                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1921                 else
1922                         kvm->arch.model.fac_mask[i] = 0UL;
1923         }
1924
1925         /* Populate the facility list initially. */
1926         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1927         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1928                S390_ARCH_FAC_LIST_SIZE_BYTE);
1929
1930         /* we are always in czam mode - even on pre z14 machines */
1931         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1932         set_kvm_facility(kvm->arch.model.fac_list, 138);
1933         /* we emulate STHYI in kvm */
1934         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1935         set_kvm_facility(kvm->arch.model.fac_list, 74);
1936         if (MACHINE_HAS_TLB_GUEST) {
1937                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1938                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1939         }
1940
1941         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1942         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1943
1944         kvm_s390_crypto_init(kvm);
1945
1946         mutex_init(&kvm->arch.float_int.ais_lock);
1947         kvm->arch.float_int.simm = 0;
1948         kvm->arch.float_int.nimm = 0;
1949         spin_lock_init(&kvm->arch.float_int.lock);
1950         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1951                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1952         init_waitqueue_head(&kvm->arch.ipte_wq);
1953         mutex_init(&kvm->arch.ipte_mutex);
1954
1955         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1956         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1957
1958         if (type & KVM_VM_S390_UCONTROL) {
1959                 kvm->arch.gmap = NULL;
1960                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1961         } else {
1962                 if (sclp.hamax == U64_MAX)
1963                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1964                 else
1965                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1966                                                     sclp.hamax + 1);
1967                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1968                 if (!kvm->arch.gmap)
1969                         goto out_err;
1970                 kvm->arch.gmap->private = kvm;
1971                 kvm->arch.gmap->pfault_enabled = 0;
1972         }
1973
1974         kvm->arch.css_support = 0;
1975         kvm->arch.use_irqchip = 0;
1976         kvm->arch.epoch = 0;
1977
1978         spin_lock_init(&kvm->arch.start_stop_lock);
1979         kvm_s390_vsie_init(kvm);
1980         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1981
1982         return 0;
1983 out_err:
1984         free_page((unsigned long)kvm->arch.sie_page2);
1985         debug_unregister(kvm->arch.dbf);
1986         sca_dispose(kvm);
1987         KVM_EVENT(3, "creation of vm failed: %d", rc);
1988         return rc;
1989 }
1990
1991 bool kvm_arch_has_vcpu_debugfs(void)
1992 {
1993         return false;
1994 }
1995
1996 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1997 {
1998         return 0;
1999 }
2000
2001 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2002 {
2003         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2004         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2005         kvm_s390_clear_local_irqs(vcpu);
2006         kvm_clear_async_pf_completion_queue(vcpu);
2007         if (!kvm_is_ucontrol(vcpu->kvm))
2008                 sca_del_vcpu(vcpu);
2009
2010         if (kvm_is_ucontrol(vcpu->kvm))
2011                 gmap_remove(vcpu->arch.gmap);
2012
2013         if (vcpu->kvm->arch.use_cmma)
2014                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2015         free_page((unsigned long)(vcpu->arch.sie_block));
2016
2017         kvm_vcpu_uninit(vcpu);
2018         kmem_cache_free(kvm_vcpu_cache, vcpu);
2019 }
2020
2021 static void kvm_free_vcpus(struct kvm *kvm)
2022 {
2023         unsigned int i;
2024         struct kvm_vcpu *vcpu;
2025
2026         kvm_for_each_vcpu(i, vcpu, kvm)
2027                 kvm_arch_vcpu_destroy(vcpu);
2028
2029         mutex_lock(&kvm->lock);
2030         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2031                 kvm->vcpus[i] = NULL;
2032
2033         atomic_set(&kvm->online_vcpus, 0);
2034         mutex_unlock(&kvm->lock);
2035 }
2036
2037 void kvm_arch_destroy_vm(struct kvm *kvm)
2038 {
2039         kvm_free_vcpus(kvm);
2040         sca_dispose(kvm);
2041         debug_unregister(kvm->arch.dbf);
2042         free_page((unsigned long)kvm->arch.sie_page2);
2043         if (!kvm_is_ucontrol(kvm))
2044                 gmap_remove(kvm->arch.gmap);
2045         kvm_s390_destroy_adapters(kvm);
2046         kvm_s390_clear_float_irqs(kvm);
2047         kvm_s390_vsie_destroy(kvm);
2048         if (kvm->arch.migration_state) {
2049                 vfree(kvm->arch.migration_state->pgste_bitmap);
2050                 kfree(kvm->arch.migration_state);
2051         }
2052         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2053 }
2054
2055 /* Section: vcpu related */
2056 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2057 {
2058         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2059         if (!vcpu->arch.gmap)
2060                 return -ENOMEM;
2061         vcpu->arch.gmap->private = vcpu->kvm;
2062
2063         return 0;
2064 }
2065
2066 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2067 {
2068         if (!kvm_s390_use_sca_entries())
2069                 return;
2070         read_lock(&vcpu->kvm->arch.sca_lock);
2071         if (vcpu->kvm->arch.use_esca) {
2072                 struct esca_block *sca = vcpu->kvm->arch.sca;
2073
2074                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2075                 sca->cpu[vcpu->vcpu_id].sda = 0;
2076         } else {
2077                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2078
2079                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2080                 sca->cpu[vcpu->vcpu_id].sda = 0;
2081         }
2082         read_unlock(&vcpu->kvm->arch.sca_lock);
2083 }
2084
2085 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2086 {
2087         if (!kvm_s390_use_sca_entries()) {
2088                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2089
2090                 /* we still need the basic sca for the ipte control */
2091                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2092                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2093         }
2094         read_lock(&vcpu->kvm->arch.sca_lock);
2095         if (vcpu->kvm->arch.use_esca) {
2096                 struct esca_block *sca = vcpu->kvm->arch.sca;
2097
2098                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2099                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2100                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2101                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2102                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2103         } else {
2104                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2105
2106                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2107                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2108                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2109                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2110         }
2111         read_unlock(&vcpu->kvm->arch.sca_lock);
2112 }
2113
2114 /* Basic SCA to Extended SCA data copy routines */
2115 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2116 {
2117         d->sda = s->sda;
2118         d->sigp_ctrl.c = s->sigp_ctrl.c;
2119         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2120 }
2121
2122 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2123 {
2124         int i;
2125
2126         d->ipte_control = s->ipte_control;
2127         d->mcn[0] = s->mcn;
2128         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2129                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2130 }
2131
2132 static int sca_switch_to_extended(struct kvm *kvm)
2133 {
2134         struct bsca_block *old_sca = kvm->arch.sca;
2135         struct esca_block *new_sca;
2136         struct kvm_vcpu *vcpu;
2137         unsigned int vcpu_idx;
2138         u32 scaol, scaoh;
2139
2140         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2141         if (!new_sca)
2142                 return -ENOMEM;
2143
2144         scaoh = (u32)((u64)(new_sca) >> 32);
2145         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2146
2147         kvm_s390_vcpu_block_all(kvm);
2148         write_lock(&kvm->arch.sca_lock);
2149
2150         sca_copy_b_to_e(new_sca, old_sca);
2151
2152         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2153                 vcpu->arch.sie_block->scaoh = scaoh;
2154                 vcpu->arch.sie_block->scaol = scaol;
2155                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2156         }
2157         kvm->arch.sca = new_sca;
2158         kvm->arch.use_esca = 1;
2159
2160         write_unlock(&kvm->arch.sca_lock);
2161         kvm_s390_vcpu_unblock_all(kvm);
2162
2163         free_page((unsigned long)old_sca);
2164
2165         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2166                  old_sca, kvm->arch.sca);
2167         return 0;
2168 }
2169
2170 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2171 {
2172         int rc;
2173
2174         if (!kvm_s390_use_sca_entries()) {
2175                 if (id < KVM_MAX_VCPUS)
2176                         return true;
2177                 return false;
2178         }
2179         if (id < KVM_S390_BSCA_CPU_SLOTS)
2180                 return true;
2181         if (!sclp.has_esca || !sclp.has_64bscao)
2182                 return false;
2183
2184         mutex_lock(&kvm->lock);
2185         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2186         mutex_unlock(&kvm->lock);
2187
2188         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2189 }
2190
2191 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2192 {
2193         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2194         kvm_clear_async_pf_completion_queue(vcpu);
2195         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2196                                     KVM_SYNC_GPRS |
2197                                     KVM_SYNC_ACRS |
2198                                     KVM_SYNC_CRS |
2199                                     KVM_SYNC_ARCH0 |
2200                                     KVM_SYNC_PFAULT;
2201         kvm_s390_set_prefix(vcpu, 0);
2202         if (test_kvm_facility(vcpu->kvm, 64))
2203                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2204         if (test_kvm_facility(vcpu->kvm, 133))
2205                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2206         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2207          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2208          */
2209         if (MACHINE_HAS_VX)
2210                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2211         else
2212                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2213
2214         if (kvm_is_ucontrol(vcpu->kvm))
2215                 return __kvm_ucontrol_vcpu_init(vcpu);
2216
2217         return 0;
2218 }
2219
2220 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2221 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2222 {
2223         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2224         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2225         vcpu->arch.cputm_start = get_tod_clock_fast();
2226         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2227 }
2228
2229 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2230 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2231 {
2232         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2233         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2234         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2235         vcpu->arch.cputm_start = 0;
2236         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2237 }
2238
2239 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2240 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2241 {
2242         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2243         vcpu->arch.cputm_enabled = true;
2244         __start_cpu_timer_accounting(vcpu);
2245 }
2246
2247 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2248 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2249 {
2250         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2251         __stop_cpu_timer_accounting(vcpu);
2252         vcpu->arch.cputm_enabled = false;
2253 }
2254
2255 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2256 {
2257         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2258         __enable_cpu_timer_accounting(vcpu);
2259         preempt_enable();
2260 }
2261
2262 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2263 {
2264         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2265         __disable_cpu_timer_accounting(vcpu);
2266         preempt_enable();
2267 }
2268
2269 /* set the cpu timer - may only be called from the VCPU thread itself */
2270 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2271 {
2272         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2273         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2274         if (vcpu->arch.cputm_enabled)
2275                 vcpu->arch.cputm_start = get_tod_clock_fast();
2276         vcpu->arch.sie_block->cputm = cputm;
2277         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2278         preempt_enable();
2279 }
2280
2281 /* update and get the cpu timer - can also be called from other VCPU threads */
2282 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2283 {
2284         unsigned int seq;
2285         __u64 value;
2286
2287         if (unlikely(!vcpu->arch.cputm_enabled))
2288                 return vcpu->arch.sie_block->cputm;
2289
2290         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2291         do {
2292                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2293                 /*
2294                  * If the writer would ever execute a read in the critical
2295                  * section, e.g. in irq context, we have a deadlock.
2296                  */
2297                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2298                 value = vcpu->arch.sie_block->cputm;
2299                 /* if cputm_start is 0, accounting is being started/stopped */
2300                 if (likely(vcpu->arch.cputm_start))
2301                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2302         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2303         preempt_enable();
2304         return value;
2305 }
2306
2307 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2308 {
2309
2310         gmap_enable(vcpu->arch.enabled_gmap);
2311         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2312         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2313                 __start_cpu_timer_accounting(vcpu);
2314         vcpu->cpu = cpu;
2315 }
2316
2317 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2318 {
2319         vcpu->cpu = -1;
2320         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2321                 __stop_cpu_timer_accounting(vcpu);
2322         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2323         vcpu->arch.enabled_gmap = gmap_get_enabled();
2324         gmap_disable(vcpu->arch.enabled_gmap);
2325
2326 }
2327
2328 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2329 {
2330         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2331         vcpu->arch.sie_block->gpsw.mask = 0UL;
2332         vcpu->arch.sie_block->gpsw.addr = 0UL;
2333         kvm_s390_set_prefix(vcpu, 0);
2334         kvm_s390_set_cpu_timer(vcpu, 0);
2335         vcpu->arch.sie_block->ckc       = 0UL;
2336         vcpu->arch.sie_block->todpr     = 0;
2337         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2338         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2339         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2340         /* make sure the new fpc will be lazily loaded */
2341         save_fpu_regs();
2342         current->thread.fpu.fpc = 0;
2343         vcpu->arch.sie_block->gbea = 1;
2344         vcpu->arch.sie_block->pp = 0;
2345         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2346         kvm_clear_async_pf_completion_queue(vcpu);
2347         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2348                 kvm_s390_vcpu_stop(vcpu);
2349         kvm_s390_clear_local_irqs(vcpu);
2350 }
2351
2352 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2353 {
2354         mutex_lock(&vcpu->kvm->lock);
2355         preempt_disable();
2356         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2357         preempt_enable();
2358         mutex_unlock(&vcpu->kvm->lock);
2359         if (!kvm_is_ucontrol(vcpu->kvm)) {
2360                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2361                 sca_add_vcpu(vcpu);
2362         }
2363         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2364                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2365         /* make vcpu_load load the right gmap on the first trigger */
2366         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2367 }
2368
2369 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2370 {
2371         if (!test_kvm_facility(vcpu->kvm, 76))
2372                 return;
2373
2374         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2375
2376         if (vcpu->kvm->arch.crypto.aes_kw)
2377                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2378         if (vcpu->kvm->arch.crypto.dea_kw)
2379                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2380
2381         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2382 }
2383
2384 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2385 {
2386         free_page(vcpu->arch.sie_block->cbrlo);
2387         vcpu->arch.sie_block->cbrlo = 0;
2388 }
2389
2390 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2391 {
2392         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2393         if (!vcpu->arch.sie_block->cbrlo)
2394                 return -ENOMEM;
2395
2396         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2397         return 0;
2398 }
2399
2400 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2401 {
2402         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2403
2404         vcpu->arch.sie_block->ibc = model->ibc;
2405         if (test_kvm_facility(vcpu->kvm, 7))
2406                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2407 }
2408
2409 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2410 {
2411         int rc = 0;
2412
2413         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2414                                                     CPUSTAT_SM |
2415                                                     CPUSTAT_STOPPED);
2416
2417         if (test_kvm_facility(vcpu->kvm, 78))
2418                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2419         else if (test_kvm_facility(vcpu->kvm, 8))
2420                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2421
2422         kvm_s390_vcpu_setup_model(vcpu);
2423
2424         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2425         if (MACHINE_HAS_ESOP)
2426                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2427         if (test_kvm_facility(vcpu->kvm, 9))
2428                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2429         if (test_kvm_facility(vcpu->kvm, 73))
2430                 vcpu->arch.sie_block->ecb |= ECB_TE;
2431
2432         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2433                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2434         if (test_kvm_facility(vcpu->kvm, 130))
2435                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2436         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2437         if (sclp.has_cei)
2438                 vcpu->arch.sie_block->eca |= ECA_CEI;
2439         if (sclp.has_ib)
2440                 vcpu->arch.sie_block->eca |= ECA_IB;
2441         if (sclp.has_siif)
2442                 vcpu->arch.sie_block->eca |= ECA_SII;
2443         if (sclp.has_sigpif)
2444                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2445         if (test_kvm_facility(vcpu->kvm, 129)) {
2446                 vcpu->arch.sie_block->eca |= ECA_VX;
2447                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2448         }
2449         if (test_kvm_facility(vcpu->kvm, 139))
2450                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2451
2452         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2453                                         | SDNXC;
2454         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2455
2456         if (sclp.has_kss)
2457                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2458         else
2459                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2460
2461         if (vcpu->kvm->arch.use_cmma) {
2462                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2463                 if (rc)
2464                         return rc;
2465         }
2466         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2467         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2468
2469         kvm_s390_vcpu_crypto_setup(vcpu);
2470
2471         return rc;
2472 }
2473
2474 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2475                                       unsigned int id)
2476 {
2477         struct kvm_vcpu *vcpu;
2478         struct sie_page *sie_page;
2479         int rc = -EINVAL;
2480
2481         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2482                 goto out;
2483
2484         rc = -ENOMEM;
2485
2486         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2487         if (!vcpu)
2488                 goto out;
2489
2490         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2491         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2492         if (!sie_page)
2493                 goto out_free_cpu;
2494
2495         vcpu->arch.sie_block = &sie_page->sie_block;
2496         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2497
2498         /* the real guest size will always be smaller than msl */
2499         vcpu->arch.sie_block->mso = 0;
2500         vcpu->arch.sie_block->msl = sclp.hamax;
2501
2502         vcpu->arch.sie_block->icpua = id;
2503         spin_lock_init(&vcpu->arch.local_int.lock);
2504         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2505         vcpu->arch.local_int.wq = &vcpu->wq;
2506         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2507         seqcount_init(&vcpu->arch.cputm_seqcount);
2508
2509         rc = kvm_vcpu_init(vcpu, kvm, id);
2510         if (rc)
2511                 goto out_free_sie_block;
2512         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2513                  vcpu->arch.sie_block);
2514         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2515
2516         return vcpu;
2517 out_free_sie_block:
2518         free_page((unsigned long)(vcpu->arch.sie_block));
2519 out_free_cpu:
2520         kmem_cache_free(kvm_vcpu_cache, vcpu);
2521 out:
2522         return ERR_PTR(rc);
2523 }
2524
2525 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2526 {
2527         return kvm_s390_vcpu_has_irq(vcpu, 0);
2528 }
2529
2530 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2531 {
2532         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2533         exit_sie(vcpu);
2534 }
2535
2536 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2537 {
2538         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2539 }
2540
2541 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2542 {
2543         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2544         exit_sie(vcpu);
2545 }
2546
2547 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2548 {
2549         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2550 }
2551
2552 /*
2553  * Kick a guest cpu out of SIE and wait until SIE is not running.
2554  * If the CPU is not running (e.g. waiting as idle) the function will
2555  * return immediately. */
2556 void exit_sie(struct kvm_vcpu *vcpu)
2557 {
2558         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2559         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2560                 cpu_relax();
2561 }
2562
2563 /* Kick a guest cpu out of SIE to process a request synchronously */
2564 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2565 {
2566         kvm_make_request(req, vcpu);
2567         kvm_s390_vcpu_request(vcpu);
2568 }
2569
2570 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2571                               unsigned long end)
2572 {
2573         struct kvm *kvm = gmap->private;
2574         struct kvm_vcpu *vcpu;
2575         unsigned long prefix;
2576         int i;
2577
2578         if (gmap_is_shadow(gmap))
2579                 return;
2580         if (start >= 1UL << 31)
2581                 /* We are only interested in prefix pages */
2582                 return;
2583         kvm_for_each_vcpu(i, vcpu, kvm) {
2584                 /* match against both prefix pages */
2585                 prefix = kvm_s390_get_prefix(vcpu);
2586                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2587                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2588                                    start, end);
2589                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2590                 }
2591         }
2592 }
2593
2594 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2595 {
2596         /* kvm common code refers to this, but never calls it */
2597         BUG();
2598         return 0;
2599 }
2600
2601 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2602                                            struct kvm_one_reg *reg)
2603 {
2604         int r = -EINVAL;
2605
2606         switch (reg->id) {
2607         case KVM_REG_S390_TODPR:
2608                 r = put_user(vcpu->arch.sie_block->todpr,
2609                              (u32 __user *)reg->addr);
2610                 break;
2611         case KVM_REG_S390_EPOCHDIFF:
2612                 r = put_user(vcpu->arch.sie_block->epoch,
2613                              (u64 __user *)reg->addr);
2614                 break;
2615         case KVM_REG_S390_CPU_TIMER:
2616                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2617                              (u64 __user *)reg->addr);
2618                 break;
2619         case KVM_REG_S390_CLOCK_COMP:
2620                 r = put_user(vcpu->arch.sie_block->ckc,
2621                              (u64 __user *)reg->addr);
2622                 break;
2623         case KVM_REG_S390_PFTOKEN:
2624                 r = put_user(vcpu->arch.pfault_token,
2625                              (u64 __user *)reg->addr);
2626                 break;
2627         case KVM_REG_S390_PFCOMPARE:
2628                 r = put_user(vcpu->arch.pfault_compare,
2629                              (u64 __user *)reg->addr);
2630                 break;
2631         case KVM_REG_S390_PFSELECT:
2632                 r = put_user(vcpu->arch.pfault_select,
2633                              (u64 __user *)reg->addr);
2634                 break;
2635         case KVM_REG_S390_PP:
2636                 r = put_user(vcpu->arch.sie_block->pp,
2637                              (u64 __user *)reg->addr);
2638                 break;
2639         case KVM_REG_S390_GBEA:
2640                 r = put_user(vcpu->arch.sie_block->gbea,
2641                              (u64 __user *)reg->addr);
2642                 break;
2643         default:
2644                 break;
2645         }
2646
2647         return r;
2648 }
2649
2650 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2651                                            struct kvm_one_reg *reg)
2652 {
2653         int r = -EINVAL;
2654         __u64 val;
2655
2656         switch (reg->id) {
2657         case KVM_REG_S390_TODPR:
2658                 r = get_user(vcpu->arch.sie_block->todpr,
2659                              (u32 __user *)reg->addr);
2660                 break;
2661         case KVM_REG_S390_EPOCHDIFF:
2662                 r = get_user(vcpu->arch.sie_block->epoch,
2663                              (u64 __user *)reg->addr);
2664                 break;
2665         case KVM_REG_S390_CPU_TIMER:
2666                 r = get_user(val, (u64 __user *)reg->addr);
2667                 if (!r)
2668                         kvm_s390_set_cpu_timer(vcpu, val);
2669                 break;
2670         case KVM_REG_S390_CLOCK_COMP:
2671                 r = get_user(vcpu->arch.sie_block->ckc,
2672                              (u64 __user *)reg->addr);
2673                 break;
2674         case KVM_REG_S390_PFTOKEN:
2675                 r = get_user(vcpu->arch.pfault_token,
2676                              (u64 __user *)reg->addr);
2677                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2678                         kvm_clear_async_pf_completion_queue(vcpu);
2679                 break;
2680         case KVM_REG_S390_PFCOMPARE:
2681                 r = get_user(vcpu->arch.pfault_compare,
2682                              (u64 __user *)reg->addr);
2683                 break;
2684         case KVM_REG_S390_PFSELECT:
2685                 r = get_user(vcpu->arch.pfault_select,
2686                              (u64 __user *)reg->addr);
2687                 break;
2688         case KVM_REG_S390_PP:
2689                 r = get_user(vcpu->arch.sie_block->pp,
2690                              (u64 __user *)reg->addr);
2691                 break;
2692         case KVM_REG_S390_GBEA:
2693                 r = get_user(vcpu->arch.sie_block->gbea,
2694                              (u64 __user *)reg->addr);
2695                 break;
2696         default:
2697                 break;
2698         }
2699
2700         return r;
2701 }
2702
2703 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2704 {
2705         kvm_s390_vcpu_initial_reset(vcpu);
2706         return 0;
2707 }
2708
2709 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2710 {
2711         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2712         return 0;
2713 }
2714
2715 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2716 {
2717         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2718         return 0;
2719 }
2720
2721 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2722                                   struct kvm_sregs *sregs)
2723 {
2724         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2725         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2726         return 0;
2727 }
2728
2729 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2730                                   struct kvm_sregs *sregs)
2731 {
2732         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2733         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2734         return 0;
2735 }
2736
2737 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2738 {
2739         if (test_fp_ctl(fpu->fpc))
2740                 return -EINVAL;
2741         vcpu->run->s.regs.fpc = fpu->fpc;
2742         if (MACHINE_HAS_VX)
2743                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2744                                  (freg_t *) fpu->fprs);
2745         else
2746                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2747         return 0;
2748 }
2749
2750 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2751 {
2752         /* make sure we have the latest values */
2753         save_fpu_regs();
2754         if (MACHINE_HAS_VX)
2755                 convert_vx_to_fp((freg_t *) fpu->fprs,
2756                                  (__vector128 *) vcpu->run->s.regs.vrs);
2757         else
2758                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2759         fpu->fpc = vcpu->run->s.regs.fpc;
2760         return 0;
2761 }
2762
2763 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2764 {
2765         int rc = 0;
2766
2767         if (!is_vcpu_stopped(vcpu))
2768                 rc = -EBUSY;
2769         else {
2770                 vcpu->run->psw_mask = psw.mask;
2771                 vcpu->run->psw_addr = psw.addr;
2772         }
2773         return rc;
2774 }
2775
2776 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2777                                   struct kvm_translation *tr)
2778 {
2779         return -EINVAL; /* not implemented yet */
2780 }
2781
2782 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2783                               KVM_GUESTDBG_USE_HW_BP | \
2784                               KVM_GUESTDBG_ENABLE)
2785
2786 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2787                                         struct kvm_guest_debug *dbg)
2788 {
2789         int rc = 0;
2790
2791         vcpu->guest_debug = 0;
2792         kvm_s390_clear_bp_data(vcpu);
2793
2794         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2795                 return -EINVAL;
2796         if (!sclp.has_gpere)
2797                 return -EINVAL;
2798
2799         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2800                 vcpu->guest_debug = dbg->control;
2801                 /* enforce guest PER */
2802                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2803
2804                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2805                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2806         } else {
2807                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2808                 vcpu->arch.guestdbg.last_bp = 0;
2809         }
2810
2811         if (rc) {
2812                 vcpu->guest_debug = 0;
2813                 kvm_s390_clear_bp_data(vcpu);
2814                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2815         }
2816
2817         return rc;
2818 }
2819
2820 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2821                                     struct kvm_mp_state *mp_state)
2822 {
2823         /* CHECK_STOP and LOAD are not supported yet */
2824         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2825                                        KVM_MP_STATE_OPERATING;
2826 }
2827
2828 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2829                                     struct kvm_mp_state *mp_state)
2830 {
2831         int rc = 0;
2832
2833         /* user space knows about this interface - let it control the state */
2834         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2835
2836         switch (mp_state->mp_state) {
2837         case KVM_MP_STATE_STOPPED:
2838                 kvm_s390_vcpu_stop(vcpu);
2839                 break;
2840         case KVM_MP_STATE_OPERATING:
2841                 kvm_s390_vcpu_start(vcpu);
2842                 break;
2843         case KVM_MP_STATE_LOAD:
2844         case KVM_MP_STATE_CHECK_STOP:
2845                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2846         default:
2847                 rc = -ENXIO;
2848         }
2849
2850         return rc;
2851 }
2852
2853 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2854 {
2855         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2856 }
2857
2858 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2859 {
2860 retry:
2861         kvm_s390_vcpu_request_handled(vcpu);
2862         if (!kvm_request_pending(vcpu))
2863                 return 0;
2864         /*
2865          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2866          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2867          * This ensures that the ipte instruction for this request has
2868          * already finished. We might race against a second unmapper that
2869          * wants to set the blocking bit. Lets just retry the request loop.
2870          */
2871         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2872                 int rc;
2873                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2874                                           kvm_s390_get_prefix(vcpu),
2875                                           PAGE_SIZE * 2, PROT_WRITE);
2876                 if (rc) {
2877                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2878                         return rc;
2879                 }
2880                 goto retry;
2881         }
2882
2883         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2884                 vcpu->arch.sie_block->ihcpu = 0xffff;
2885                 goto retry;
2886         }
2887
2888         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2889                 if (!ibs_enabled(vcpu)) {
2890                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2891                         atomic_or(CPUSTAT_IBS,
2892                                         &vcpu->arch.sie_block->cpuflags);
2893                 }
2894                 goto retry;
2895         }
2896
2897         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2898                 if (ibs_enabled(vcpu)) {
2899                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2900                         atomic_andnot(CPUSTAT_IBS,
2901                                           &vcpu->arch.sie_block->cpuflags);
2902                 }
2903                 goto retry;
2904         }
2905
2906         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2907                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2908                 goto retry;
2909         }
2910
2911         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2912                 /*
2913                  * Disable CMMA virtualization; we will emulate the ESSA
2914                  * instruction manually, in order to provide additional
2915                  * functionalities needed for live migration.
2916                  */
2917                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2918                 goto retry;
2919         }
2920
2921         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2922                 /*
2923                  * Re-enable CMMA virtualization if CMMA is available and
2924                  * was used.
2925                  */
2926                 if ((vcpu->kvm->arch.use_cmma) &&
2927                     (vcpu->kvm->mm->context.use_cmma))
2928                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2929                 goto retry;
2930         }
2931
2932         /* nothing to do, just clear the request */
2933         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2934
2935         return 0;
2936 }
2937
2938 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2939                                  const struct kvm_s390_vm_tod_clock *gtod)
2940 {
2941         struct kvm_vcpu *vcpu;
2942         struct kvm_s390_tod_clock_ext htod;
2943         int i;
2944
2945         mutex_lock(&kvm->lock);
2946         preempt_disable();
2947
2948         get_tod_clock_ext((char *)&htod);
2949
2950         kvm->arch.epoch = gtod->tod - htod.tod;
2951         kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2952
2953         if (kvm->arch.epoch > gtod->tod)
2954                 kvm->arch.epdx -= 1;
2955
2956         kvm_s390_vcpu_block_all(kvm);
2957         kvm_for_each_vcpu(i, vcpu, kvm) {
2958                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2959                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2960         }
2961
2962         kvm_s390_vcpu_unblock_all(kvm);
2963         preempt_enable();
2964         mutex_unlock(&kvm->lock);
2965 }
2966
2967 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2968 {
2969         struct kvm_vcpu *vcpu;
2970         int i;
2971
2972         mutex_lock(&kvm->lock);
2973         preempt_disable();
2974         kvm->arch.epoch = tod - get_tod_clock();
2975         kvm_s390_vcpu_block_all(kvm);
2976         kvm_for_each_vcpu(i, vcpu, kvm)
2977                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2978         kvm_s390_vcpu_unblock_all(kvm);
2979         preempt_enable();
2980         mutex_unlock(&kvm->lock);
2981 }
2982
2983 /**
2984  * kvm_arch_fault_in_page - fault-in guest page if necessary
2985  * @vcpu: The corresponding virtual cpu
2986  * @gpa: Guest physical address
2987  * @writable: Whether the page should be writable or not
2988  *
2989  * Make sure that a guest page has been faulted-in on the host.
2990  *
2991  * Return: Zero on success, negative error code otherwise.
2992  */
2993 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2994 {
2995         return gmap_fault(vcpu->arch.gmap, gpa,
2996                           writable ? FAULT_FLAG_WRITE : 0);
2997 }
2998
2999 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3000                                       unsigned long token)
3001 {
3002         struct kvm_s390_interrupt inti;
3003         struct kvm_s390_irq irq;
3004
3005         if (start_token) {
3006                 irq.u.ext.ext_params2 = token;
3007                 irq.type = KVM_S390_INT_PFAULT_INIT;
3008                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3009         } else {
3010                 inti.type = KVM_S390_INT_PFAULT_DONE;
3011                 inti.parm64 = token;
3012                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3013         }
3014 }
3015
3016 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3017                                      struct kvm_async_pf *work)
3018 {
3019         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3020         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3021 }
3022
3023 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3024                                  struct kvm_async_pf *work)
3025 {
3026         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3027         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3028 }
3029
3030 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3031                                struct kvm_async_pf *work)
3032 {
3033         /* s390 will always inject the page directly */
3034 }
3035
3036 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3037 {
3038         /*
3039          * s390 will always inject the page directly,
3040          * but we still want check_async_completion to cleanup
3041          */
3042         return true;
3043 }
3044
3045 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3046 {
3047         hva_t hva;
3048         struct kvm_arch_async_pf arch;
3049         int rc;
3050
3051         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3052                 return 0;
3053         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3054             vcpu->arch.pfault_compare)
3055                 return 0;
3056         if (psw_extint_disabled(vcpu))
3057                 return 0;
3058         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3059                 return 0;
3060         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3061                 return 0;
3062         if (!vcpu->arch.gmap->pfault_enabled)
3063                 return 0;
3064
3065         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3066         hva += current->thread.gmap_addr & ~PAGE_MASK;
3067         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3068                 return 0;
3069
3070         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3071         return rc;
3072 }
3073
3074 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3075 {
3076         int rc, cpuflags;
3077
3078         /*
3079          * On s390 notifications for arriving pages will be delivered directly
3080          * to the guest but the house keeping for completed pfaults is
3081          * handled outside the worker.
3082          */
3083         kvm_check_async_pf_completion(vcpu);
3084
3085         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3086         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3087
3088         if (need_resched())
3089                 schedule();
3090
3091         if (test_cpu_flag(CIF_MCCK_PENDING))
3092                 s390_handle_mcck();
3093
3094         if (!kvm_is_ucontrol(vcpu->kvm)) {
3095                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3096                 if (rc)
3097                         return rc;
3098         }
3099
3100         rc = kvm_s390_handle_requests(vcpu);
3101         if (rc)
3102                 return rc;
3103
3104         if (guestdbg_enabled(vcpu)) {
3105                 kvm_s390_backup_guest_per_regs(vcpu);
3106                 kvm_s390_patch_guest_per_regs(vcpu);
3107         }
3108
3109         vcpu->arch.sie_block->icptcode = 0;
3110         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3111         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3112         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3113
3114         return 0;
3115 }
3116
3117 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3118 {
3119         struct kvm_s390_pgm_info pgm_info = {
3120                 .code = PGM_ADDRESSING,
3121         };
3122         u8 opcode, ilen;
3123         int rc;
3124
3125         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3126         trace_kvm_s390_sie_fault(vcpu);
3127
3128         /*
3129          * We want to inject an addressing exception, which is defined as a
3130          * suppressing or terminating exception. However, since we came here
3131          * by a DAT access exception, the PSW still points to the faulting
3132          * instruction since DAT exceptions are nullifying. So we've got
3133          * to look up the current opcode to get the length of the instruction
3134          * to be able to forward the PSW.
3135          */
3136         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3137         ilen = insn_length(opcode);
3138         if (rc < 0) {
3139                 return rc;
3140         } else if (rc) {
3141                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3142                  * Forward by arbitrary ilc, injection will take care of
3143                  * nullification if necessary.
3144                  */
3145                 pgm_info = vcpu->arch.pgm;
3146                 ilen = 4;
3147         }
3148         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3149         kvm_s390_forward_psw(vcpu, ilen);
3150         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3151 }
3152
3153 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3154 {
3155         struct mcck_volatile_info *mcck_info;
3156         struct sie_page *sie_page;
3157
3158         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3159                    vcpu->arch.sie_block->icptcode);
3160         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3161
3162         if (guestdbg_enabled(vcpu))
3163                 kvm_s390_restore_guest_per_regs(vcpu);
3164
3165         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3166         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3167
3168         if (exit_reason == -EINTR) {
3169                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3170                 sie_page = container_of(vcpu->arch.sie_block,
3171                                         struct sie_page, sie_block);
3172                 mcck_info = &sie_page->mcck_info;
3173                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3174                 return 0;
3175         }
3176
3177         if (vcpu->arch.sie_block->icptcode > 0) {
3178                 int rc = kvm_handle_sie_intercept(vcpu);
3179
3180                 if (rc != -EOPNOTSUPP)
3181                         return rc;
3182                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3183                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3184                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3185                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3186                 return -EREMOTE;
3187         } else if (exit_reason != -EFAULT) {
3188                 vcpu->stat.exit_null++;
3189                 return 0;
3190         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3191                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3192                 vcpu->run->s390_ucontrol.trans_exc_code =
3193                                                 current->thread.gmap_addr;
3194                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3195                 return -EREMOTE;
3196         } else if (current->thread.gmap_pfault) {
3197                 trace_kvm_s390_major_guest_pfault(vcpu);
3198                 current->thread.gmap_pfault = 0;
3199                 if (kvm_arch_setup_async_pf(vcpu))
3200                         return 0;
3201                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3202         }
3203         return vcpu_post_run_fault_in_sie(vcpu);
3204 }
3205
3206 static int __vcpu_run(struct kvm_vcpu *vcpu)
3207 {
3208         int rc, exit_reason;
3209
3210         /*
3211          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3212          * ning the guest), so that memslots (and other stuff) are protected
3213          */
3214         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3215
3216         do {
3217                 rc = vcpu_pre_run(vcpu);
3218                 if (rc)
3219                         break;
3220
3221                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3222                 /*
3223                  * As PF_VCPU will be used in fault handler, between
3224                  * guest_enter and guest_exit should be no uaccess.
3225                  */
3226                 local_irq_disable();
3227                 guest_enter_irqoff();
3228                 __disable_cpu_timer_accounting(vcpu);
3229                 local_irq_enable();
3230                 exit_reason = sie64a(vcpu->arch.sie_block,
3231                                      vcpu->run->s.regs.gprs);
3232                 local_irq_disable();
3233                 __enable_cpu_timer_accounting(vcpu);
3234                 guest_exit_irqoff();
3235                 local_irq_enable();
3236                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3237
3238                 rc = vcpu_post_run(vcpu, exit_reason);
3239         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3240
3241         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3242         return rc;