Merge tag 'xfs-4.15-fixes-8' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
[sfrench/cifs-2.6.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2017
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62         { "userspace_handled", VCPU_STAT(exit_userspace) },
63         { "exit_null", VCPU_STAT(exit_null) },
64         { "exit_validity", VCPU_STAT(exit_validity) },
65         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66         { "exit_external_request", VCPU_STAT(exit_external_request) },
67         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68         { "exit_instruction", VCPU_STAT(exit_instruction) },
69         { "exit_pei", VCPU_STAT(exit_pei) },
70         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92         { "instruction_spx", VCPU_STAT(instruction_spx) },
93         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94         { "instruction_stap", VCPU_STAT(instruction_stap) },
95         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99         { "instruction_essa", VCPU_STAT(instruction_essa) },
100         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104         { "instruction_sie", VCPU_STAT(instruction_sie) },
105         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121         { "diagnose_10", VCPU_STAT(diagnose_10) },
122         { "diagnose_44", VCPU_STAT(diagnose_44) },
123         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124         { "diagnose_258", VCPU_STAT(diagnose_258) },
125         { "diagnose_308", VCPU_STAT(diagnose_308) },
126         { "diagnose_500", VCPU_STAT(diagnose_500) },
127         { NULL }
128 };
129
130 struct kvm_s390_tod_clock_ext {
131         __u8 epoch_idx;
132         __u64 tod;
133         __u8 reserved[7];
134 } __packed;
135
136 /* allow nested virtualization in KVM (if enabled by user space) */
137 static int nested;
138 module_param(nested, int, S_IRUGO);
139 MODULE_PARM_DESC(nested, "Nested virtualization support");
140
141 /* upper facilities limit for kvm */
142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
143
144 unsigned long kvm_s390_fac_list_mask_size(void)
145 {
146         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
147         return ARRAY_SIZE(kvm_s390_fac_list_mask);
148 }
149
150 /* available cpu features supported by kvm */
151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
152 /* available subfunctions indicated via query / "test bit" */
153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
154
155 static struct gmap_notifier gmap_notifier;
156 static struct gmap_notifier vsie_gmap_notifier;
157 debug_info_t *kvm_s390_dbf;
158
159 /* Section: not file related */
160 int kvm_arch_hardware_enable(void)
161 {
162         /* every s390 is virtualization enabled ;-) */
163         return 0;
164 }
165
166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
167                               unsigned long end);
168
169 /*
170  * This callback is executed during stop_machine(). All CPUs are therefore
171  * temporarily stopped. In order not to change guest behavior, we have to
172  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
173  * so a CPU won't be stopped while calculating with the epoch.
174  */
175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
176                           void *v)
177 {
178         struct kvm *kvm;
179         struct kvm_vcpu *vcpu;
180         int i;
181         unsigned long long *delta = v;
182
183         list_for_each_entry(kvm, &vm_list, vm_list) {
184                 kvm->arch.epoch -= *delta;
185                 kvm_for_each_vcpu(i, vcpu, kvm) {
186                         vcpu->arch.sie_block->epoch -= *delta;
187                         if (vcpu->arch.cputm_enabled)
188                                 vcpu->arch.cputm_start += *delta;
189                         if (vcpu->arch.vsie_block)
190                                 vcpu->arch.vsie_block->epoch -= *delta;
191                 }
192         }
193         return NOTIFY_OK;
194 }
195
196 static struct notifier_block kvm_clock_notifier = {
197         .notifier_call = kvm_clock_sync,
198 };
199
200 int kvm_arch_hardware_setup(void)
201 {
202         gmap_notifier.notifier_call = kvm_gmap_notifier;
203         gmap_register_pte_notifier(&gmap_notifier);
204         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
205         gmap_register_pte_notifier(&vsie_gmap_notifier);
206         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
207                                        &kvm_clock_notifier);
208         return 0;
209 }
210
211 void kvm_arch_hardware_unsetup(void)
212 {
213         gmap_unregister_pte_notifier(&gmap_notifier);
214         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
215         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
216                                          &kvm_clock_notifier);
217 }
218
219 static void allow_cpu_feat(unsigned long nr)
220 {
221         set_bit_inv(nr, kvm_s390_available_cpu_feat);
222 }
223
224 static inline int plo_test_bit(unsigned char nr)
225 {
226         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
227         int cc;
228
229         asm volatile(
230                 /* Parameter registers are ignored for "test bit" */
231                 "       plo     0,0,0,0(0)\n"
232                 "       ipm     %0\n"
233                 "       srl     %0,28\n"
234                 : "=d" (cc)
235                 : "d" (r0)
236                 : "cc");
237         return cc == 0;
238 }
239
240 static void kvm_s390_cpu_feat_init(void)
241 {
242         int i;
243
244         for (i = 0; i < 256; ++i) {
245                 if (plo_test_bit(i))
246                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
247         }
248
249         if (test_facility(28)) /* TOD-clock steering */
250                 ptff(kvm_s390_available_subfunc.ptff,
251                      sizeof(kvm_s390_available_subfunc.ptff),
252                      PTFF_QAF);
253
254         if (test_facility(17)) { /* MSA */
255                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
256                               kvm_s390_available_subfunc.kmac);
257                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
258                               kvm_s390_available_subfunc.kmc);
259                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
260                               kvm_s390_available_subfunc.km);
261                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
262                               kvm_s390_available_subfunc.kimd);
263                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.klmd);
265         }
266         if (test_facility(76)) /* MSA3 */
267                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.pckmo);
269         if (test_facility(77)) { /* MSA4 */
270                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
271                               kvm_s390_available_subfunc.kmctr);
272                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
273                               kvm_s390_available_subfunc.kmf);
274                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
275                               kvm_s390_available_subfunc.kmo);
276                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
277                               kvm_s390_available_subfunc.pcc);
278         }
279         if (test_facility(57)) /* MSA5 */
280                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
281                               kvm_s390_available_subfunc.ppno);
282
283         if (test_facility(146)) /* MSA8 */
284                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
285                               kvm_s390_available_subfunc.kma);
286
287         if (MACHINE_HAS_ESOP)
288                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
289         /*
290          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
291          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
292          */
293         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
294             !test_facility(3) || !nested)
295                 return;
296         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
297         if (sclp.has_64bscao)
298                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
299         if (sclp.has_siif)
300                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
301         if (sclp.has_gpere)
302                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
303         if (sclp.has_gsls)
304                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
305         if (sclp.has_ib)
306                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
307         if (sclp.has_cei)
308                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
309         if (sclp.has_ibs)
310                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
311         if (sclp.has_kss)
312                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
313         /*
314          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
315          * all skey handling functions read/set the skey from the PGSTE
316          * instead of the real storage key.
317          *
318          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
319          * pages being detected as preserved although they are resident.
320          *
321          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
322          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
323          *
324          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
325          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
326          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
327          *
328          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
329          * cannot easily shadow the SCA because of the ipte lock.
330          */
331 }
332
333 int kvm_arch_init(void *opaque)
334 {
335         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
336         if (!kvm_s390_dbf)
337                 return -ENOMEM;
338
339         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
340                 debug_unregister(kvm_s390_dbf);
341                 return -ENOMEM;
342         }
343
344         kvm_s390_cpu_feat_init();
345
346         /* Register floating interrupt controller interface. */
347         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
348 }
349
350 void kvm_arch_exit(void)
351 {
352         debug_unregister(kvm_s390_dbf);
353 }
354
355 /* Section: device related */
356 long kvm_arch_dev_ioctl(struct file *filp,
357                         unsigned int ioctl, unsigned long arg)
358 {
359         if (ioctl == KVM_S390_ENABLE_SIE)
360                 return s390_enable_sie();
361         return -EINVAL;
362 }
363
364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
365 {
366         int r;
367
368         switch (ext) {
369         case KVM_CAP_S390_PSW:
370         case KVM_CAP_S390_GMAP:
371         case KVM_CAP_SYNC_MMU:
372 #ifdef CONFIG_KVM_S390_UCONTROL
373         case KVM_CAP_S390_UCONTROL:
374 #endif
375         case KVM_CAP_ASYNC_PF:
376         case KVM_CAP_SYNC_REGS:
377         case KVM_CAP_ONE_REG:
378         case KVM_CAP_ENABLE_CAP:
379         case KVM_CAP_S390_CSS_SUPPORT:
380         case KVM_CAP_IOEVENTFD:
381         case KVM_CAP_DEVICE_CTRL:
382         case KVM_CAP_ENABLE_CAP_VM:
383         case KVM_CAP_S390_IRQCHIP:
384         case KVM_CAP_VM_ATTRIBUTES:
385         case KVM_CAP_MP_STATE:
386         case KVM_CAP_IMMEDIATE_EXIT:
387         case KVM_CAP_S390_INJECT_IRQ:
388         case KVM_CAP_S390_USER_SIGP:
389         case KVM_CAP_S390_USER_STSI:
390         case KVM_CAP_S390_SKEYS:
391         case KVM_CAP_S390_IRQ_STATE:
392         case KVM_CAP_S390_USER_INSTR0:
393         case KVM_CAP_S390_CMMA_MIGRATION:
394         case KVM_CAP_S390_AIS:
395         case KVM_CAP_S390_AIS_MIGRATION:
396                 r = 1;
397                 break;
398         case KVM_CAP_S390_MEM_OP:
399                 r = MEM_OP_MAX_SIZE;
400                 break;
401         case KVM_CAP_NR_VCPUS:
402         case KVM_CAP_MAX_VCPUS:
403                 r = KVM_S390_BSCA_CPU_SLOTS;
404                 if (!kvm_s390_use_sca_entries())
405                         r = KVM_MAX_VCPUS;
406                 else if (sclp.has_esca && sclp.has_64bscao)
407                         r = KVM_S390_ESCA_CPU_SLOTS;
408                 break;
409         case KVM_CAP_NR_MEMSLOTS:
410                 r = KVM_USER_MEM_SLOTS;
411                 break;
412         case KVM_CAP_S390_COW:
413                 r = MACHINE_HAS_ESOP;
414                 break;
415         case KVM_CAP_S390_VECTOR_REGISTERS:
416                 r = MACHINE_HAS_VX;
417                 break;
418         case KVM_CAP_S390_RI:
419                 r = test_facility(64);
420                 break;
421         case KVM_CAP_S390_GS:
422                 r = test_facility(133);
423                 break;
424         default:
425                 r = 0;
426         }
427         return r;
428 }
429
430 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
431                                         struct kvm_memory_slot *memslot)
432 {
433         gfn_t cur_gfn, last_gfn;
434         unsigned long address;
435         struct gmap *gmap = kvm->arch.gmap;
436
437         /* Loop over all guest pages */
438         last_gfn = memslot->base_gfn + memslot->npages;
439         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
440                 address = gfn_to_hva_memslot(memslot, cur_gfn);
441
442                 if (test_and_clear_guest_dirty(gmap->mm, address))
443                         mark_page_dirty(kvm, cur_gfn);
444                 if (fatal_signal_pending(current))
445                         return;
446                 cond_resched();
447         }
448 }
449
450 /* Section: vm related */
451 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
452
453 /*
454  * Get (and clear) the dirty memory log for a memory slot.
455  */
456 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
457                                struct kvm_dirty_log *log)
458 {
459         int r;
460         unsigned long n;
461         struct kvm_memslots *slots;
462         struct kvm_memory_slot *memslot;
463         int is_dirty = 0;
464
465         if (kvm_is_ucontrol(kvm))
466                 return -EINVAL;
467
468         mutex_lock(&kvm->slots_lock);
469
470         r = -EINVAL;
471         if (log->slot >= KVM_USER_MEM_SLOTS)
472                 goto out;
473
474         slots = kvm_memslots(kvm);
475         memslot = id_to_memslot(slots, log->slot);
476         r = -ENOENT;
477         if (!memslot->dirty_bitmap)
478                 goto out;
479
480         kvm_s390_sync_dirty_log(kvm, memslot);
481         r = kvm_get_dirty_log(kvm, log, &is_dirty);
482         if (r)
483                 goto out;
484
485         /* Clear the dirty log */
486         if (is_dirty) {
487                 n = kvm_dirty_bitmap_bytes(memslot);
488                 memset(memslot->dirty_bitmap, 0, n);
489         }
490         r = 0;
491 out:
492         mutex_unlock(&kvm->slots_lock);
493         return r;
494 }
495
496 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
497 {
498         unsigned int i;
499         struct kvm_vcpu *vcpu;
500
501         kvm_for_each_vcpu(i, vcpu, kvm) {
502                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
503         }
504 }
505
506 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
507 {
508         int r;
509
510         if (cap->flags)
511                 return -EINVAL;
512
513         switch (cap->cap) {
514         case KVM_CAP_S390_IRQCHIP:
515                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
516                 kvm->arch.use_irqchip = 1;
517                 r = 0;
518                 break;
519         case KVM_CAP_S390_USER_SIGP:
520                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
521                 kvm->arch.user_sigp = 1;
522                 r = 0;
523                 break;
524         case KVM_CAP_S390_VECTOR_REGISTERS:
525                 mutex_lock(&kvm->lock);
526                 if (kvm->created_vcpus) {
527                         r = -EBUSY;
528                 } else if (MACHINE_HAS_VX) {
529                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
530                         set_kvm_facility(kvm->arch.model.fac_list, 129);
531                         if (test_facility(134)) {
532                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
533                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
534                         }
535                         if (test_facility(135)) {
536                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
537                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
538                         }
539                         r = 0;
540                 } else
541                         r = -EINVAL;
542                 mutex_unlock(&kvm->lock);
543                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
544                          r ? "(not available)" : "(success)");
545                 break;
546         case KVM_CAP_S390_RI:
547                 r = -EINVAL;
548                 mutex_lock(&kvm->lock);
549                 if (kvm->created_vcpus) {
550                         r = -EBUSY;
551                 } else if (test_facility(64)) {
552                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
553                         set_kvm_facility(kvm->arch.model.fac_list, 64);
554                         r = 0;
555                 }
556                 mutex_unlock(&kvm->lock);
557                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
558                          r ? "(not available)" : "(success)");
559                 break;
560         case KVM_CAP_S390_AIS:
561                 mutex_lock(&kvm->lock);
562                 if (kvm->created_vcpus) {
563                         r = -EBUSY;
564                 } else {
565                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
566                         set_kvm_facility(kvm->arch.model.fac_list, 72);
567                         r = 0;
568                 }
569                 mutex_unlock(&kvm->lock);
570                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
571                          r ? "(not available)" : "(success)");
572                 break;
573         case KVM_CAP_S390_GS:
574                 r = -EINVAL;
575                 mutex_lock(&kvm->lock);
576                 if (atomic_read(&kvm->online_vcpus)) {
577                         r = -EBUSY;
578                 } else if (test_facility(133)) {
579                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
580                         set_kvm_facility(kvm->arch.model.fac_list, 133);
581                         r = 0;
582                 }
583                 mutex_unlock(&kvm->lock);
584                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
585                          r ? "(not available)" : "(success)");
586                 break;
587         case KVM_CAP_S390_USER_STSI:
588                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
589                 kvm->arch.user_stsi = 1;
590                 r = 0;
591                 break;
592         case KVM_CAP_S390_USER_INSTR0:
593                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
594                 kvm->arch.user_instr0 = 1;
595                 icpt_operexc_on_all_vcpus(kvm);
596                 r = 0;
597                 break;
598         default:
599                 r = -EINVAL;
600                 break;
601         }
602         return r;
603 }
604
605 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
606 {
607         int ret;
608
609         switch (attr->attr) {
610         case KVM_S390_VM_MEM_LIMIT_SIZE:
611                 ret = 0;
612                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
613                          kvm->arch.mem_limit);
614                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
615                         ret = -EFAULT;
616                 break;
617         default:
618                 ret = -ENXIO;
619                 break;
620         }
621         return ret;
622 }
623
624 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
625 {
626         int ret;
627         unsigned int idx;
628         switch (attr->attr) {
629         case KVM_S390_VM_MEM_ENABLE_CMMA:
630                 ret = -ENXIO;
631                 if (!sclp.has_cmma)
632                         break;
633
634                 ret = -EBUSY;
635                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
636                 mutex_lock(&kvm->lock);
637                 if (!kvm->created_vcpus) {
638                         kvm->arch.use_cmma = 1;
639                         ret = 0;
640                 }
641                 mutex_unlock(&kvm->lock);
642                 break;
643         case KVM_S390_VM_MEM_CLR_CMMA:
644                 ret = -ENXIO;
645                 if (!sclp.has_cmma)
646                         break;
647                 ret = -EINVAL;
648                 if (!kvm->arch.use_cmma)
649                         break;
650
651                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
652                 mutex_lock(&kvm->lock);
653                 idx = srcu_read_lock(&kvm->srcu);
654                 s390_reset_cmma(kvm->arch.gmap->mm);
655                 srcu_read_unlock(&kvm->srcu, idx);
656                 mutex_unlock(&kvm->lock);
657                 ret = 0;
658                 break;
659         case KVM_S390_VM_MEM_LIMIT_SIZE: {
660                 unsigned long new_limit;
661
662                 if (kvm_is_ucontrol(kvm))
663                         return -EINVAL;
664
665                 if (get_user(new_limit, (u64 __user *)attr->addr))
666                         return -EFAULT;
667
668                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
669                     new_limit > kvm->arch.mem_limit)
670                         return -E2BIG;
671
672                 if (!new_limit)
673                         return -EINVAL;
674
675                 /* gmap_create takes last usable address */
676                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
677                         new_limit -= 1;
678
679                 ret = -EBUSY;
680                 mutex_lock(&kvm->lock);
681                 if (!kvm->created_vcpus) {
682                         /* gmap_create will round the limit up */
683                         struct gmap *new = gmap_create(current->mm, new_limit);
684
685                         if (!new) {
686                                 ret = -ENOMEM;
687                         } else {
688                                 gmap_remove(kvm->arch.gmap);
689                                 new->private = kvm;
690                                 kvm->arch.gmap = new;
691                                 ret = 0;
692                         }
693                 }
694                 mutex_unlock(&kvm->lock);
695                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
696                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
697                          (void *) kvm->arch.gmap->asce);
698                 break;
699         }
700         default:
701                 ret = -ENXIO;
702                 break;
703         }
704         return ret;
705 }
706
707 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
708
709 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
710 {
711         struct kvm_vcpu *vcpu;
712         int i;
713
714         if (!test_kvm_facility(kvm, 76))
715                 return -EINVAL;
716
717         mutex_lock(&kvm->lock);
718         switch (attr->attr) {
719         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
720                 get_random_bytes(
721                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
722                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
723                 kvm->arch.crypto.aes_kw = 1;
724                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
725                 break;
726         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
727                 get_random_bytes(
728                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
729                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
730                 kvm->arch.crypto.dea_kw = 1;
731                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
732                 break;
733         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
734                 kvm->arch.crypto.aes_kw = 0;
735                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
736                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
737                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
738                 break;
739         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
740                 kvm->arch.crypto.dea_kw = 0;
741                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
742                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
743                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
744                 break;
745         default:
746                 mutex_unlock(&kvm->lock);
747                 return -ENXIO;
748         }
749
750         kvm_for_each_vcpu(i, vcpu, kvm) {
751                 kvm_s390_vcpu_crypto_setup(vcpu);
752                 exit_sie(vcpu);
753         }
754         mutex_unlock(&kvm->lock);
755         return 0;
756 }
757
758 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
759 {
760         int cx;
761         struct kvm_vcpu *vcpu;
762
763         kvm_for_each_vcpu(cx, vcpu, kvm)
764                 kvm_s390_sync_request(req, vcpu);
765 }
766
767 /*
768  * Must be called with kvm->srcu held to avoid races on memslots, and with
769  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
770  */
771 static int kvm_s390_vm_start_migration(struct kvm *kvm)
772 {
773         struct kvm_s390_migration_state *mgs;
774         struct kvm_memory_slot *ms;
775         /* should be the only one */
776         struct kvm_memslots *slots;
777         unsigned long ram_pages;
778         int slotnr;
779
780         /* migration mode already enabled */
781         if (kvm->arch.migration_state)
782                 return 0;
783
784         slots = kvm_memslots(kvm);
785         if (!slots || !slots->used_slots)
786                 return -EINVAL;
787
788         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
789         if (!mgs)
790                 return -ENOMEM;
791         kvm->arch.migration_state = mgs;
792
793         if (kvm->arch.use_cmma) {
794                 /*
795                  * Get the last slot. They should be sorted by base_gfn, so the
796                  * last slot is also the one at the end of the address space.
797                  * We have verified above that at least one slot is present.
798                  */
799                 ms = slots->memslots + slots->used_slots - 1;
800                 /* round up so we only use full longs */
801                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
802                 /* allocate enough bytes to store all the bits */
803                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
804                 if (!mgs->pgste_bitmap) {
805                         kfree(mgs);
806                         kvm->arch.migration_state = NULL;
807                         return -ENOMEM;
808                 }
809
810                 mgs->bitmap_size = ram_pages;
811                 atomic64_set(&mgs->dirty_pages, ram_pages);
812                 /* mark all the pages in active slots as dirty */
813                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
814                         ms = slots->memslots + slotnr;
815                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
816                 }
817
818                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
819         }
820         return 0;
821 }
822
823 /*
824  * Must be called with kvm->lock to avoid races with ourselves and
825  * kvm_s390_vm_start_migration.
826  */
827 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
828 {
829         struct kvm_s390_migration_state *mgs;
830
831         /* migration mode already disabled */
832         if (!kvm->arch.migration_state)
833                 return 0;
834         mgs = kvm->arch.migration_state;
835         kvm->arch.migration_state = NULL;
836
837         if (kvm->arch.use_cmma) {
838                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
839                 vfree(mgs->pgste_bitmap);
840         }
841         kfree(mgs);
842         return 0;
843 }
844
845 static int kvm_s390_vm_set_migration(struct kvm *kvm,
846                                      struct kvm_device_attr *attr)
847 {
848         int idx, res = -ENXIO;
849
850         mutex_lock(&kvm->lock);
851         switch (attr->attr) {
852         case KVM_S390_VM_MIGRATION_START:
853                 idx = srcu_read_lock(&kvm->srcu);
854                 res = kvm_s390_vm_start_migration(kvm);
855                 srcu_read_unlock(&kvm->srcu, idx);
856                 break;
857         case KVM_S390_VM_MIGRATION_STOP:
858                 res = kvm_s390_vm_stop_migration(kvm);
859                 break;
860         default:
861                 break;
862         }
863         mutex_unlock(&kvm->lock);
864
865         return res;
866 }
867
868 static int kvm_s390_vm_get_migration(struct kvm *kvm,
869                                      struct kvm_device_attr *attr)
870 {
871         u64 mig = (kvm->arch.migration_state != NULL);
872
873         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
874                 return -ENXIO;
875
876         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
877                 return -EFAULT;
878         return 0;
879 }
880
881 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
882 {
883         struct kvm_s390_vm_tod_clock gtod;
884
885         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
886                 return -EFAULT;
887
888         if (test_kvm_facility(kvm, 139))
889                 kvm_s390_set_tod_clock_ext(kvm, &gtod);
890         else if (gtod.epoch_idx == 0)
891                 kvm_s390_set_tod_clock(kvm, gtod.tod);
892         else
893                 return -EINVAL;
894
895         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
896                 gtod.epoch_idx, gtod.tod);
897
898         return 0;
899 }
900
901 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
902 {
903         u8 gtod_high;
904
905         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
906                                            sizeof(gtod_high)))
907                 return -EFAULT;
908
909         if (gtod_high != 0)
910                 return -EINVAL;
911         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
912
913         return 0;
914 }
915
916 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
917 {
918         u64 gtod;
919
920         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
921                 return -EFAULT;
922
923         kvm_s390_set_tod_clock(kvm, gtod);
924         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
925         return 0;
926 }
927
928 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
929 {
930         int ret;
931
932         if (attr->flags)
933                 return -EINVAL;
934
935         switch (attr->attr) {
936         case KVM_S390_VM_TOD_EXT:
937                 ret = kvm_s390_set_tod_ext(kvm, attr);
938                 break;
939         case KVM_S390_VM_TOD_HIGH:
940                 ret = kvm_s390_set_tod_high(kvm, attr);
941                 break;
942         case KVM_S390_VM_TOD_LOW:
943                 ret = kvm_s390_set_tod_low(kvm, attr);
944                 break;
945         default:
946                 ret = -ENXIO;
947                 break;
948         }
949         return ret;
950 }
951
952 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
953                                         struct kvm_s390_vm_tod_clock *gtod)
954 {
955         struct kvm_s390_tod_clock_ext htod;
956
957         preempt_disable();
958
959         get_tod_clock_ext((char *)&htod);
960
961         gtod->tod = htod.tod + kvm->arch.epoch;
962         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
963
964         if (gtod->tod < htod.tod)
965                 gtod->epoch_idx += 1;
966
967         preempt_enable();
968 }
969
970 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
971 {
972         struct kvm_s390_vm_tod_clock gtod;
973
974         memset(&gtod, 0, sizeof(gtod));
975
976         if (test_kvm_facility(kvm, 139))
977                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
978         else
979                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
980
981         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
982                 return -EFAULT;
983
984         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
985                 gtod.epoch_idx, gtod.tod);
986         return 0;
987 }
988
989 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
990 {
991         u8 gtod_high = 0;
992
993         if (copy_to_user((void __user *)attr->addr, &gtod_high,
994                                          sizeof(gtod_high)))
995                 return -EFAULT;
996         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
997
998         return 0;
999 }
1000
1001 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1002 {
1003         u64 gtod;
1004
1005         gtod = kvm_s390_get_tod_clock_fast(kvm);
1006         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1007                 return -EFAULT;
1008         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1009
1010         return 0;
1011 }
1012
1013 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1014 {
1015         int ret;
1016
1017         if (attr->flags)
1018                 return -EINVAL;
1019
1020         switch (attr->attr) {
1021         case KVM_S390_VM_TOD_EXT:
1022                 ret = kvm_s390_get_tod_ext(kvm, attr);
1023                 break;
1024         case KVM_S390_VM_TOD_HIGH:
1025                 ret = kvm_s390_get_tod_high(kvm, attr);
1026                 break;
1027         case KVM_S390_VM_TOD_LOW:
1028                 ret = kvm_s390_get_tod_low(kvm, attr);
1029                 break;
1030         default:
1031                 ret = -ENXIO;
1032                 break;
1033         }
1034         return ret;
1035 }
1036
1037 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1038 {
1039         struct kvm_s390_vm_cpu_processor *proc;
1040         u16 lowest_ibc, unblocked_ibc;
1041         int ret = 0;
1042
1043         mutex_lock(&kvm->lock);
1044         if (kvm->created_vcpus) {
1045                 ret = -EBUSY;
1046                 goto out;
1047         }
1048         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1049         if (!proc) {
1050                 ret = -ENOMEM;
1051                 goto out;
1052         }
1053         if (!copy_from_user(proc, (void __user *)attr->addr,
1054                             sizeof(*proc))) {
1055                 kvm->arch.model.cpuid = proc->cpuid;
1056                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1057                 unblocked_ibc = sclp.ibc & 0xfff;
1058                 if (lowest_ibc && proc->ibc) {
1059                         if (proc->ibc > unblocked_ibc)
1060                                 kvm->arch.model.ibc = unblocked_ibc;
1061                         else if (proc->ibc < lowest_ibc)
1062                                 kvm->arch.model.ibc = lowest_ibc;
1063                         else
1064                                 kvm->arch.model.ibc = proc->ibc;
1065                 }
1066                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1067                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1068                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1069                          kvm->arch.model.ibc,
1070                          kvm->arch.model.cpuid);
1071                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1072                          kvm->arch.model.fac_list[0],
1073                          kvm->arch.model.fac_list[1],
1074                          kvm->arch.model.fac_list[2]);
1075         } else
1076                 ret = -EFAULT;
1077         kfree(proc);
1078 out:
1079         mutex_unlock(&kvm->lock);
1080         return ret;
1081 }
1082
1083 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1084                                        struct kvm_device_attr *attr)
1085 {
1086         struct kvm_s390_vm_cpu_feat data;
1087         int ret = -EBUSY;
1088
1089         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1090                 return -EFAULT;
1091         if (!bitmap_subset((unsigned long *) data.feat,
1092                            kvm_s390_available_cpu_feat,
1093                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1094                 return -EINVAL;
1095
1096         mutex_lock(&kvm->lock);
1097         if (!atomic_read(&kvm->online_vcpus)) {
1098                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1099                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1100                 ret = 0;
1101         }
1102         mutex_unlock(&kvm->lock);
1103         return ret;
1104 }
1105
1106 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1107                                           struct kvm_device_attr *attr)
1108 {
1109         /*
1110          * Once supported by kernel + hw, we have to store the subfunctions
1111          * in kvm->arch and remember that user space configured them.
1112          */
1113         return -ENXIO;
1114 }
1115
1116 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1117 {
1118         int ret = -ENXIO;
1119
1120         switch (attr->attr) {
1121         case KVM_S390_VM_CPU_PROCESSOR:
1122                 ret = kvm_s390_set_processor(kvm, attr);
1123                 break;
1124         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1125                 ret = kvm_s390_set_processor_feat(kvm, attr);
1126                 break;
1127         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1128                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1129                 break;
1130         }
1131         return ret;
1132 }
1133
1134 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1135 {
1136         struct kvm_s390_vm_cpu_processor *proc;
1137         int ret = 0;
1138
1139         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1140         if (!proc) {
1141                 ret = -ENOMEM;
1142                 goto out;
1143         }
1144         proc->cpuid = kvm->arch.model.cpuid;
1145         proc->ibc = kvm->arch.model.ibc;
1146         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1147                S390_ARCH_FAC_LIST_SIZE_BYTE);
1148         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1149                  kvm->arch.model.ibc,
1150                  kvm->arch.model.cpuid);
1151         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1152                  kvm->arch.model.fac_list[0],
1153                  kvm->arch.model.fac_list[1],
1154                  kvm->arch.model.fac_list[2]);
1155         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1156                 ret = -EFAULT;
1157         kfree(proc);
1158 out:
1159         return ret;
1160 }
1161
1162 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1163 {
1164         struct kvm_s390_vm_cpu_machine *mach;
1165         int ret = 0;
1166
1167         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1168         if (!mach) {
1169                 ret = -ENOMEM;
1170                 goto out;
1171         }
1172         get_cpu_id((struct cpuid *) &mach->cpuid);
1173         mach->ibc = sclp.ibc;
1174         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1175                S390_ARCH_FAC_LIST_SIZE_BYTE);
1176         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1177                sizeof(S390_lowcore.stfle_fac_list));
1178         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1179                  kvm->arch.model.ibc,
1180                  kvm->arch.model.cpuid);
1181         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1182                  mach->fac_mask[0],
1183                  mach->fac_mask[1],
1184                  mach->fac_mask[2]);
1185         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1186                  mach->fac_list[0],
1187                  mach->fac_list[1],
1188                  mach->fac_list[2]);
1189         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1190                 ret = -EFAULT;
1191         kfree(mach);
1192 out:
1193         return ret;
1194 }
1195
1196 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1197                                        struct kvm_device_attr *attr)
1198 {
1199         struct kvm_s390_vm_cpu_feat data;
1200
1201         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1202                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1203         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1204                 return -EFAULT;
1205         return 0;
1206 }
1207
1208 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1209                                      struct kvm_device_attr *attr)
1210 {
1211         struct kvm_s390_vm_cpu_feat data;
1212
1213         bitmap_copy((unsigned long *) data.feat,
1214                     kvm_s390_available_cpu_feat,
1215                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1216         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1217                 return -EFAULT;
1218         return 0;
1219 }
1220
1221 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1222                                           struct kvm_device_attr *attr)
1223 {
1224         /*
1225          * Once we can actually configure subfunctions (kernel + hw support),
1226          * we have to check if they were already set by user space, if so copy
1227          * them from kvm->arch.
1228          */
1229         return -ENXIO;
1230 }
1231
1232 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1233                                         struct kvm_device_attr *attr)
1234 {
1235         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1236             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1237                 return -EFAULT;
1238         return 0;
1239 }
1240 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1241 {
1242         int ret = -ENXIO;
1243
1244         switch (attr->attr) {
1245         case KVM_S390_VM_CPU_PROCESSOR:
1246                 ret = kvm_s390_get_processor(kvm, attr);
1247                 break;
1248         case KVM_S390_VM_CPU_MACHINE:
1249                 ret = kvm_s390_get_machine(kvm, attr);
1250                 break;
1251         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1252                 ret = kvm_s390_get_processor_feat(kvm, attr);
1253                 break;
1254         case KVM_S390_VM_CPU_MACHINE_FEAT:
1255                 ret = kvm_s390_get_machine_feat(kvm, attr);
1256                 break;
1257         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1258                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1259                 break;
1260         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1261                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1262                 break;
1263         }
1264         return ret;
1265 }
1266
1267 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1268 {
1269         int ret;
1270
1271         switch (attr->group) {
1272         case KVM_S390_VM_MEM_CTRL:
1273                 ret = kvm_s390_set_mem_control(kvm, attr);
1274                 break;
1275         case KVM_S390_VM_TOD:
1276                 ret = kvm_s390_set_tod(kvm, attr);
1277                 break;
1278         case KVM_S390_VM_CPU_MODEL:
1279                 ret = kvm_s390_set_cpu_model(kvm, attr);
1280                 break;
1281         case KVM_S390_VM_CRYPTO:
1282                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1283                 break;
1284         case KVM_S390_VM_MIGRATION:
1285                 ret = kvm_s390_vm_set_migration(kvm, attr);
1286                 break;
1287         default:
1288                 ret = -ENXIO;
1289                 break;
1290         }
1291
1292         return ret;
1293 }
1294
1295 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1296 {
1297         int ret;
1298
1299         switch (attr->group) {
1300         case KVM_S390_VM_MEM_CTRL:
1301                 ret = kvm_s390_get_mem_control(kvm, attr);
1302                 break;
1303         case KVM_S390_VM_TOD:
1304                 ret = kvm_s390_get_tod(kvm, attr);
1305                 break;
1306         case KVM_S390_VM_CPU_MODEL:
1307                 ret = kvm_s390_get_cpu_model(kvm, attr);
1308                 break;
1309         case KVM_S390_VM_MIGRATION:
1310                 ret = kvm_s390_vm_get_migration(kvm, attr);
1311                 break;
1312         default:
1313                 ret = -ENXIO;
1314                 break;
1315         }
1316
1317         return ret;
1318 }
1319
1320 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1321 {
1322         int ret;
1323
1324         switch (attr->group) {
1325         case KVM_S390_VM_MEM_CTRL:
1326                 switch (attr->attr) {
1327                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1328                 case KVM_S390_VM_MEM_CLR_CMMA:
1329                         ret = sclp.has_cmma ? 0 : -ENXIO;
1330                         break;
1331                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1332                         ret = 0;
1333                         break;
1334                 default:
1335                         ret = -ENXIO;
1336                         break;
1337                 }
1338                 break;
1339         case KVM_S390_VM_TOD:
1340                 switch (attr->attr) {
1341                 case KVM_S390_VM_TOD_LOW:
1342                 case KVM_S390_VM_TOD_HIGH:
1343                         ret = 0;
1344                         break;
1345                 default:
1346                         ret = -ENXIO;
1347                         break;
1348                 }
1349                 break;
1350         case KVM_S390_VM_CPU_MODEL:
1351                 switch (attr->attr) {
1352                 case KVM_S390_VM_CPU_PROCESSOR:
1353                 case KVM_S390_VM_CPU_MACHINE:
1354                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1355                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1356                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1357                         ret = 0;
1358                         break;
1359                 /* configuring subfunctions is not supported yet */
1360                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1361                 default:
1362                         ret = -ENXIO;
1363                         break;
1364                 }
1365                 break;
1366         case KVM_S390_VM_CRYPTO:
1367                 switch (attr->attr) {
1368                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1369                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1370                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1371                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1372                         ret = 0;
1373                         break;
1374                 default:
1375                         ret = -ENXIO;
1376                         break;
1377                 }
1378                 break;
1379         case KVM_S390_VM_MIGRATION:
1380                 ret = 0;
1381                 break;
1382         default:
1383                 ret = -ENXIO;
1384                 break;
1385         }
1386
1387         return ret;
1388 }
1389
1390 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1391 {
1392         uint8_t *keys;
1393         uint64_t hva;
1394         int srcu_idx, i, r = 0;
1395
1396         if (args->flags != 0)
1397                 return -EINVAL;
1398
1399         /* Is this guest using storage keys? */
1400         if (!mm_use_skey(current->mm))
1401                 return KVM_S390_GET_SKEYS_NONE;
1402
1403         /* Enforce sane limit on memory allocation */
1404         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1405                 return -EINVAL;
1406
1407         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1408         if (!keys)
1409                 return -ENOMEM;
1410
1411         down_read(&current->mm->mmap_sem);
1412         srcu_idx = srcu_read_lock(&kvm->srcu);
1413         for (i = 0; i < args->count; i++) {
1414                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1415                 if (kvm_is_error_hva(hva)) {
1416                         r = -EFAULT;
1417                         break;
1418                 }
1419
1420                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1421                 if (r)
1422                         break;
1423         }
1424         srcu_read_unlock(&kvm->srcu, srcu_idx);
1425         up_read(&current->mm->mmap_sem);
1426
1427         if (!r) {
1428                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1429                                  sizeof(uint8_t) * args->count);
1430                 if (r)
1431                         r = -EFAULT;
1432         }
1433
1434         kvfree(keys);
1435         return r;
1436 }
1437
1438 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1439 {
1440         uint8_t *keys;
1441         uint64_t hva;
1442         int srcu_idx, i, r = 0;
1443
1444         if (args->flags != 0)
1445                 return -EINVAL;
1446
1447         /* Enforce sane limit on memory allocation */
1448         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1449                 return -EINVAL;
1450
1451         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1452         if (!keys)
1453                 return -ENOMEM;
1454
1455         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1456                            sizeof(uint8_t) * args->count);
1457         if (r) {
1458                 r = -EFAULT;
1459                 goto out;
1460         }
1461
1462         /* Enable storage key handling for the guest */
1463         r = s390_enable_skey();
1464         if (r)
1465                 goto out;
1466
1467         down_read(&current->mm->mmap_sem);
1468         srcu_idx = srcu_read_lock(&kvm->srcu);
1469         for (i = 0; i < args->count; i++) {
1470                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1471                 if (kvm_is_error_hva(hva)) {
1472                         r = -EFAULT;
1473                         break;
1474                 }
1475
1476                 /* Lowest order bit is reserved */
1477                 if (keys[i] & 0x01) {
1478                         r = -EINVAL;
1479                         break;
1480                 }
1481
1482                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1483                 if (r)
1484                         break;
1485         }
1486         srcu_read_unlock(&kvm->srcu, srcu_idx);
1487         up_read(&current->mm->mmap_sem);
1488 out:
1489         kvfree(keys);
1490         return r;
1491 }
1492
1493 /*
1494  * Base address and length must be sent at the start of each block, therefore
1495  * it's cheaper to send some clean data, as long as it's less than the size of
1496  * two longs.
1497  */
1498 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1499 /* for consistency */
1500 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1501
1502 /*
1503  * This function searches for the next page with dirty CMMA attributes, and
1504  * saves the attributes in the buffer up to either the end of the buffer or
1505  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1506  * no trailing clean bytes are saved.
1507  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1508  * output buffer will indicate 0 as length.
1509  */
1510 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1511                                   struct kvm_s390_cmma_log *args)
1512 {
1513         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1514         unsigned long bufsize, hva, pgstev, i, next, cur;
1515         int srcu_idx, peek, r = 0, rr;
1516         u8 *res;
1517
1518         cur = args->start_gfn;
1519         i = next = pgstev = 0;
1520
1521         if (unlikely(!kvm->arch.use_cmma))
1522                 return -ENXIO;
1523         /* Invalid/unsupported flags were specified */
1524         if (args->flags & ~KVM_S390_CMMA_PEEK)
1525                 return -EINVAL;
1526         /* Migration mode query, and we are not doing a migration */
1527         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1528         if (!peek && !s)
1529                 return -EINVAL;
1530         /* CMMA is disabled or was not used, or the buffer has length zero */
1531         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1532         if (!bufsize || !kvm->mm->context.use_cmma) {
1533                 memset(args, 0, sizeof(*args));
1534                 return 0;
1535         }
1536
1537         if (!peek) {
1538                 /* We are not peeking, and there are no dirty pages */
1539                 if (!atomic64_read(&s->dirty_pages)) {
1540                         memset(args, 0, sizeof(*args));
1541                         return 0;
1542                 }
1543                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1544                                     args->start_gfn);
1545                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1546                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1547                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1548                         memset(args, 0, sizeof(*args));
1549                         return 0;
1550                 }
1551                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1552         }
1553
1554         res = vmalloc(bufsize);
1555         if (!res)
1556                 return -ENOMEM;
1557
1558         args->start_gfn = cur;
1559
1560         down_read(&kvm->mm->mmap_sem);
1561         srcu_idx = srcu_read_lock(&kvm->srcu);
1562         while (i < bufsize) {
1563                 hva = gfn_to_hva(kvm, cur);
1564                 if (kvm_is_error_hva(hva)) {
1565                         r = -EFAULT;
1566                         break;
1567                 }
1568                 /* decrement only if we actually flipped the bit to 0 */
1569                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1570                         atomic64_dec(&s->dirty_pages);
1571                 r = get_pgste(kvm->mm, hva, &pgstev);
1572                 if (r < 0)
1573                         pgstev = 0;
1574                 /* save the value */
1575                 res[i++] = (pgstev >> 24) & 0x43;
1576                 /*
1577                  * if the next bit is too far away, stop.
1578                  * if we reached the previous "next", find the next one
1579                  */
1580                 if (!peek) {
1581                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1582                                 break;
1583                         if (cur == next)
1584                                 next = find_next_bit(s->pgste_bitmap,
1585                                                      s->bitmap_size, cur + 1);
1586                 /* reached the end of the bitmap or of the buffer, stop */
1587                         if ((next >= s->bitmap_size) ||
1588                             (next >= args->start_gfn + bufsize))
1589                                 break;
1590                 }
1591                 cur++;
1592         }
1593         srcu_read_unlock(&kvm->srcu, srcu_idx);
1594         up_read(&kvm->mm->mmap_sem);
1595         args->count = i;
1596         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1597
1598         rr = copy_to_user((void __user *)args->values, res, args->count);
1599         if (rr)
1600                 r = -EFAULT;
1601
1602         vfree(res);
1603         return r;
1604 }
1605
1606 /*
1607  * This function sets the CMMA attributes for the given pages. If the input
1608  * buffer has zero length, no action is taken, otherwise the attributes are
1609  * set and the mm->context.use_cmma flag is set.
1610  */
1611 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1612                                   const struct kvm_s390_cmma_log *args)
1613 {
1614         unsigned long hva, mask, pgstev, i;
1615         uint8_t *bits;
1616         int srcu_idx, r = 0;
1617
1618         mask = args->mask;
1619
1620         if (!kvm->arch.use_cmma)
1621                 return -ENXIO;
1622         /* invalid/unsupported flags */
1623         if (args->flags != 0)
1624                 return -EINVAL;
1625         /* Enforce sane limit on memory allocation */
1626         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1627                 return -EINVAL;
1628         /* Nothing to do */
1629         if (args->count == 0)
1630                 return 0;
1631
1632         bits = vmalloc(sizeof(*bits) * args->count);
1633         if (!bits)
1634                 return -ENOMEM;
1635
1636         r = copy_from_user(bits, (void __user *)args->values, args->count);
1637         if (r) {
1638                 r = -EFAULT;
1639                 goto out;
1640         }
1641
1642         down_read(&kvm->mm->mmap_sem);
1643         srcu_idx = srcu_read_lock(&kvm->srcu);
1644         for (i = 0; i < args->count; i++) {
1645                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1646                 if (kvm_is_error_hva(hva)) {
1647                         r = -EFAULT;
1648                         break;
1649                 }
1650
1651                 pgstev = bits[i];
1652                 pgstev = pgstev << 24;
1653                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1654                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1655         }
1656         srcu_read_unlock(&kvm->srcu, srcu_idx);
1657         up_read(&kvm->mm->mmap_sem);
1658
1659         if (!kvm->mm->context.use_cmma) {
1660                 down_write(&kvm->mm->mmap_sem);
1661                 kvm->mm->context.use_cmma = 1;
1662                 up_write(&kvm->mm->mmap_sem);
1663         }
1664 out:
1665         vfree(bits);
1666         return r;
1667 }
1668
1669 long kvm_arch_vm_ioctl(struct file *filp,
1670                        unsigned int ioctl, unsigned long arg)
1671 {
1672         struct kvm *kvm = filp->private_data;
1673         void __user *argp = (void __user *)arg;
1674         struct kvm_device_attr attr;
1675         int r;
1676
1677         switch (ioctl) {
1678         case KVM_S390_INTERRUPT: {
1679                 struct kvm_s390_interrupt s390int;
1680
1681                 r = -EFAULT;
1682                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1683                         break;
1684                 r = kvm_s390_inject_vm(kvm, &s390int);
1685                 break;
1686         }
1687         case KVM_ENABLE_CAP: {
1688                 struct kvm_enable_cap cap;
1689                 r = -EFAULT;
1690                 if (copy_from_user(&cap, argp, sizeof(cap)))
1691                         break;
1692                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1693                 break;
1694         }
1695         case KVM_CREATE_IRQCHIP: {
1696                 struct kvm_irq_routing_entry routing;
1697
1698                 r = -EINVAL;
1699                 if (kvm->arch.use_irqchip) {
1700                         /* Set up dummy routing. */
1701                         memset(&routing, 0, sizeof(routing));
1702                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1703                 }
1704                 break;
1705         }
1706         case KVM_SET_DEVICE_ATTR: {
1707                 r = -EFAULT;
1708                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1709                         break;
1710                 r = kvm_s390_vm_set_attr(kvm, &attr);
1711                 break;
1712         }
1713         case KVM_GET_DEVICE_ATTR: {
1714                 r = -EFAULT;
1715                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1716                         break;
1717                 r = kvm_s390_vm_get_attr(kvm, &attr);
1718                 break;
1719         }
1720         case KVM_HAS_DEVICE_ATTR: {
1721                 r = -EFAULT;
1722                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1723                         break;
1724                 r = kvm_s390_vm_has_attr(kvm, &attr);
1725                 break;
1726         }
1727         case KVM_S390_GET_SKEYS: {
1728                 struct kvm_s390_skeys args;
1729
1730                 r = -EFAULT;
1731                 if (copy_from_user(&args, argp,
1732                                    sizeof(struct kvm_s390_skeys)))
1733                         break;
1734                 r = kvm_s390_get_skeys(kvm, &args);
1735                 break;
1736         }
1737         case KVM_S390_SET_SKEYS: {
1738                 struct kvm_s390_skeys args;
1739
1740                 r = -EFAULT;
1741                 if (copy_from_user(&args, argp,
1742                                    sizeof(struct kvm_s390_skeys)))
1743                         break;
1744                 r = kvm_s390_set_skeys(kvm, &args);
1745                 break;
1746         }
1747         case KVM_S390_GET_CMMA_BITS: {
1748                 struct kvm_s390_cmma_log args;
1749
1750                 r = -EFAULT;
1751                 if (copy_from_user(&args, argp, sizeof(args)))
1752                         break;
1753                 r = kvm_s390_get_cmma_bits(kvm, &args);
1754                 if (!r) {
1755                         r = copy_to_user(argp, &args, sizeof(args));
1756                         if (r)
1757                                 r = -EFAULT;
1758                 }
1759                 break;
1760         }
1761         case KVM_S390_SET_CMMA_BITS: {
1762                 struct kvm_s390_cmma_log args;
1763
1764                 r = -EFAULT;
1765                 if (copy_from_user(&args, argp, sizeof(args)))
1766                         break;
1767                 r = kvm_s390_set_cmma_bits(kvm, &args);
1768                 break;
1769         }
1770         default:
1771                 r = -ENOTTY;
1772         }
1773
1774         return r;
1775 }
1776
1777 static int kvm_s390_query_ap_config(u8 *config)
1778 {
1779         u32 fcn_code = 0x04000000UL;
1780         u32 cc = 0;
1781
1782         memset(config, 0, 128);
1783         asm volatile(
1784                 "lgr 0,%1\n"
1785                 "lgr 2,%2\n"
1786                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1787                 "0: ipm %0\n"
1788                 "srl %0,28\n"
1789                 "1:\n"
1790                 EX_TABLE(0b, 1b)
1791                 : "+r" (cc)
1792                 : "r" (fcn_code), "r" (config)
1793                 : "cc", "0", "2", "memory"
1794         );
1795
1796         return cc;
1797 }
1798
1799 static int kvm_s390_apxa_installed(void)
1800 {
1801         u8 config[128];
1802         int cc;
1803
1804         if (test_facility(12)) {
1805                 cc = kvm_s390_query_ap_config(config);
1806
1807                 if (cc)
1808                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1809                 else
1810                         return config[0] & 0x40;
1811         }
1812
1813         return 0;
1814 }
1815
1816 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1817 {
1818         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1819
1820         if (kvm_s390_apxa_installed())
1821                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1822         else
1823                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1824 }
1825
1826 static u64 kvm_s390_get_initial_cpuid(void)
1827 {
1828         struct cpuid cpuid;
1829
1830         get_cpu_id(&cpuid);
1831         cpuid.version = 0xff;
1832         return *((u64 *) &cpuid);
1833 }
1834
1835 static void kvm_s390_crypto_init(struct kvm *kvm)
1836 {
1837         if (!test_kvm_facility(kvm, 76))
1838                 return;
1839
1840         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1841         kvm_s390_set_crycb_format(kvm);
1842
1843         /* Enable AES/DEA protected key functions by default */
1844         kvm->arch.crypto.aes_kw = 1;
1845         kvm->arch.crypto.dea_kw = 1;
1846         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1847                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1848         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1849                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1850 }
1851
1852 static void sca_dispose(struct kvm *kvm)
1853 {
1854         if (kvm->arch.use_esca)
1855                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1856         else
1857                 free_page((unsigned long)(kvm->arch.sca));
1858         kvm->arch.sca = NULL;
1859 }
1860
1861 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1862 {
1863         gfp_t alloc_flags = GFP_KERNEL;
1864         int i, rc;
1865         char debug_name[16];
1866         static unsigned long sca_offset;
1867
1868         rc = -EINVAL;
1869 #ifdef CONFIG_KVM_S390_UCONTROL
1870         if (type & ~KVM_VM_S390_UCONTROL)
1871                 goto out_err;
1872         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1873                 goto out_err;
1874 #else
1875         if (type)
1876                 goto out_err;
1877 #endif
1878
1879         rc = s390_enable_sie();
1880         if (rc)
1881                 goto out_err;
1882
1883         rc = -ENOMEM;
1884
1885         kvm->arch.use_esca = 0; /* start with basic SCA */
1886         if (!sclp.has_64bscao)
1887                 alloc_flags |= GFP_DMA;
1888         rwlock_init(&kvm->arch.sca_lock);
1889         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1890         if (!kvm->arch.sca)
1891                 goto out_err;
1892         spin_lock(&kvm_lock);
1893         sca_offset += 16;
1894         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1895                 sca_offset = 0;
1896         kvm->arch.sca = (struct bsca_block *)
1897                         ((char *) kvm->arch.sca + sca_offset);
1898         spin_unlock(&kvm_lock);
1899
1900         sprintf(debug_name, "kvm-%u", current->pid);
1901
1902         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1903         if (!kvm->arch.dbf)
1904                 goto out_err;
1905
1906         kvm->arch.sie_page2 =
1907              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1908         if (!kvm->arch.sie_page2)
1909                 goto out_err;
1910
1911         /* Populate the facility mask initially. */
1912         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1913                sizeof(S390_lowcore.stfle_fac_list));
1914         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1915                 if (i < kvm_s390_fac_list_mask_size())
1916                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1917                 else
1918                         kvm->arch.model.fac_mask[i] = 0UL;
1919         }
1920
1921         /* Populate the facility list initially. */
1922         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1923         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1924                S390_ARCH_FAC_LIST_SIZE_BYTE);
1925
1926         /* we are always in czam mode - even on pre z14 machines */
1927         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1928         set_kvm_facility(kvm->arch.model.fac_list, 138);
1929         /* we emulate STHYI in kvm */
1930         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1931         set_kvm_facility(kvm->arch.model.fac_list, 74);
1932         if (MACHINE_HAS_TLB_GUEST) {
1933                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1934                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1935         }
1936
1937         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1938         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1939
1940         kvm_s390_crypto_init(kvm);
1941
1942         mutex_init(&kvm->arch.float_int.ais_lock);
1943         kvm->arch.float_int.simm = 0;
1944         kvm->arch.float_int.nimm = 0;
1945         spin_lock_init(&kvm->arch.float_int.lock);
1946         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1947                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1948         init_waitqueue_head(&kvm->arch.ipte_wq);
1949         mutex_init(&kvm->arch.ipte_mutex);
1950
1951         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1952         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1953
1954         if (type & KVM_VM_S390_UCONTROL) {
1955                 kvm->arch.gmap = NULL;
1956                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1957         } else {
1958                 if (sclp.hamax == U64_MAX)
1959                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1960                 else
1961                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1962                                                     sclp.hamax + 1);
1963                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1964                 if (!kvm->arch.gmap)
1965                         goto out_err;
1966                 kvm->arch.gmap->private = kvm;
1967                 kvm->arch.gmap->pfault_enabled = 0;
1968         }
1969
1970         kvm->arch.css_support = 0;
1971         kvm->arch.use_irqchip = 0;
1972         kvm->arch.epoch = 0;
1973
1974         spin_lock_init(&kvm->arch.start_stop_lock);
1975         kvm_s390_vsie_init(kvm);
1976         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1977
1978         return 0;
1979 out_err:
1980         free_page((unsigned long)kvm->arch.sie_page2);
1981         debug_unregister(kvm->arch.dbf);
1982         sca_dispose(kvm);
1983         KVM_EVENT(3, "creation of vm failed: %d", rc);
1984         return rc;
1985 }
1986
1987 bool kvm_arch_has_vcpu_debugfs(void)
1988 {
1989         return false;
1990 }
1991
1992 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1993 {
1994         return 0;
1995 }
1996
1997 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1998 {
1999         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2000         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2001         kvm_s390_clear_local_irqs(vcpu);
2002         kvm_clear_async_pf_completion_queue(vcpu);
2003         if (!kvm_is_ucontrol(vcpu->kvm))
2004                 sca_del_vcpu(vcpu);
2005
2006         if (kvm_is_ucontrol(vcpu->kvm))
2007                 gmap_remove(vcpu->arch.gmap);
2008
2009         if (vcpu->kvm->arch.use_cmma)
2010                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2011         free_page((unsigned long)(vcpu->arch.sie_block));
2012
2013         kvm_vcpu_uninit(vcpu);
2014         kmem_cache_free(kvm_vcpu_cache, vcpu);
2015 }
2016
2017 static void kvm_free_vcpus(struct kvm *kvm)
2018 {
2019         unsigned int i;
2020         struct kvm_vcpu *vcpu;
2021
2022         kvm_for_each_vcpu(i, vcpu, kvm)
2023                 kvm_arch_vcpu_destroy(vcpu);
2024
2025         mutex_lock(&kvm->lock);
2026         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2027                 kvm->vcpus[i] = NULL;
2028
2029         atomic_set(&kvm->online_vcpus, 0);
2030         mutex_unlock(&kvm->lock);
2031 }
2032
2033 void kvm_arch_destroy_vm(struct kvm *kvm)
2034 {
2035         kvm_free_vcpus(kvm);
2036         sca_dispose(kvm);
2037         debug_unregister(kvm->arch.dbf);
2038         free_page((unsigned long)kvm->arch.sie_page2);
2039         if (!kvm_is_ucontrol(kvm))
2040                 gmap_remove(kvm->arch.gmap);
2041         kvm_s390_destroy_adapters(kvm);
2042         kvm_s390_clear_float_irqs(kvm);
2043         kvm_s390_vsie_destroy(kvm);
2044         if (kvm->arch.migration_state) {
2045                 vfree(kvm->arch.migration_state->pgste_bitmap);
2046                 kfree(kvm->arch.migration_state);
2047         }
2048         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2049 }
2050
2051 /* Section: vcpu related */
2052 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2053 {
2054         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2055         if (!vcpu->arch.gmap)
2056                 return -ENOMEM;
2057         vcpu->arch.gmap->private = vcpu->kvm;
2058
2059         return 0;
2060 }
2061
2062 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2063 {
2064         if (!kvm_s390_use_sca_entries())
2065                 return;
2066         read_lock(&vcpu->kvm->arch.sca_lock);
2067         if (vcpu->kvm->arch.use_esca) {
2068                 struct esca_block *sca = vcpu->kvm->arch.sca;
2069
2070                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2071                 sca->cpu[vcpu->vcpu_id].sda = 0;
2072         } else {
2073                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2074
2075                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2076                 sca->cpu[vcpu->vcpu_id].sda = 0;
2077         }
2078         read_unlock(&vcpu->kvm->arch.sca_lock);
2079 }
2080
2081 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2082 {
2083         if (!kvm_s390_use_sca_entries()) {
2084                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2085
2086                 /* we still need the basic sca for the ipte control */
2087                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2088                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2089         }
2090         read_lock(&vcpu->kvm->arch.sca_lock);
2091         if (vcpu->kvm->arch.use_esca) {
2092                 struct esca_block *sca = vcpu->kvm->arch.sca;
2093
2094                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2095                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2096                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2097                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2098                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2099         } else {
2100                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2101
2102                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2103                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2104                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2105                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2106         }
2107         read_unlock(&vcpu->kvm->arch.sca_lock);
2108 }
2109
2110 /* Basic SCA to Extended SCA data copy routines */
2111 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2112 {
2113         d->sda = s->sda;
2114         d->sigp_ctrl.c = s->sigp_ctrl.c;
2115         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2116 }
2117
2118 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2119 {
2120         int i;
2121
2122         d->ipte_control = s->ipte_control;
2123         d->mcn[0] = s->mcn;
2124         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2125                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2126 }
2127
2128 static int sca_switch_to_extended(struct kvm *kvm)
2129 {
2130         struct bsca_block *old_sca = kvm->arch.sca;
2131         struct esca_block *new_sca;
2132         struct kvm_vcpu *vcpu;
2133         unsigned int vcpu_idx;
2134         u32 scaol, scaoh;
2135
2136         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2137         if (!new_sca)
2138                 return -ENOMEM;
2139
2140         scaoh = (u32)((u64)(new_sca) >> 32);
2141         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2142
2143         kvm_s390_vcpu_block_all(kvm);
2144         write_lock(&kvm->arch.sca_lock);
2145
2146         sca_copy_b_to_e(new_sca, old_sca);
2147
2148         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2149                 vcpu->arch.sie_block->scaoh = scaoh;
2150                 vcpu->arch.sie_block->scaol = scaol;
2151                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2152         }
2153         kvm->arch.sca = new_sca;
2154         kvm->arch.use_esca = 1;
2155
2156         write_unlock(&kvm->arch.sca_lock);
2157         kvm_s390_vcpu_unblock_all(kvm);
2158
2159         free_page((unsigned long)old_sca);
2160
2161         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2162                  old_sca, kvm->arch.sca);
2163         return 0;
2164 }
2165
2166 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2167 {
2168         int rc;
2169
2170         if (!kvm_s390_use_sca_entries()) {
2171                 if (id < KVM_MAX_VCPUS)
2172                         return true;
2173                 return false;
2174         }
2175         if (id < KVM_S390_BSCA_CPU_SLOTS)
2176                 return true;
2177         if (!sclp.has_esca || !sclp.has_64bscao)
2178                 return false;
2179
2180         mutex_lock(&kvm->lock);
2181         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2182         mutex_unlock(&kvm->lock);
2183
2184         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2185 }
2186
2187 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2188 {
2189         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2190         kvm_clear_async_pf_completion_queue(vcpu);
2191         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2192                                     KVM_SYNC_GPRS |
2193                                     KVM_SYNC_ACRS |
2194                                     KVM_SYNC_CRS |
2195                                     KVM_SYNC_ARCH0 |
2196                                     KVM_SYNC_PFAULT;
2197         kvm_s390_set_prefix(vcpu, 0);
2198         if (test_kvm_facility(vcpu->kvm, 64))
2199                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2200         if (test_kvm_facility(vcpu->kvm, 133))
2201                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2202         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2203          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2204          */
2205         if (MACHINE_HAS_VX)
2206                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2207         else
2208                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2209
2210         if (kvm_is_ucontrol(vcpu->kvm))
2211                 return __kvm_ucontrol_vcpu_init(vcpu);
2212
2213         return 0;
2214 }
2215
2216 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2217 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2218 {
2219         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2220         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2221         vcpu->arch.cputm_start = get_tod_clock_fast();
2222         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2223 }
2224
2225 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2226 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2227 {
2228         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2229         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2230         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2231         vcpu->arch.cputm_start = 0;
2232         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2233 }
2234
2235 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2236 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2237 {
2238         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2239         vcpu->arch.cputm_enabled = true;
2240         __start_cpu_timer_accounting(vcpu);
2241 }
2242
2243 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2244 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2245 {
2246         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2247         __stop_cpu_timer_accounting(vcpu);
2248         vcpu->arch.cputm_enabled = false;
2249 }
2250
2251 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2252 {
2253         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2254         __enable_cpu_timer_accounting(vcpu);
2255         preempt_enable();
2256 }
2257
2258 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2259 {
2260         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2261         __disable_cpu_timer_accounting(vcpu);
2262         preempt_enable();
2263 }
2264
2265 /* set the cpu timer - may only be called from the VCPU thread itself */
2266 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2267 {
2268         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2269         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2270         if (vcpu->arch.cputm_enabled)
2271                 vcpu->arch.cputm_start = get_tod_clock_fast();
2272         vcpu->arch.sie_block->cputm = cputm;
2273         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2274         preempt_enable();
2275 }
2276
2277 /* update and get the cpu timer - can also be called from other VCPU threads */
2278 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2279 {
2280         unsigned int seq;
2281         __u64 value;
2282
2283         if (unlikely(!vcpu->arch.cputm_enabled))
2284                 return vcpu->arch.sie_block->cputm;
2285
2286         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2287         do {
2288                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2289                 /*
2290                  * If the writer would ever execute a read in the critical
2291                  * section, e.g. in irq context, we have a deadlock.
2292                  */
2293                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2294                 value = vcpu->arch.sie_block->cputm;
2295                 /* if cputm_start is 0, accounting is being started/stopped */
2296                 if (likely(vcpu->arch.cputm_start))
2297                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2298         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2299         preempt_enable();
2300         return value;
2301 }
2302
2303 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2304 {
2305
2306         gmap_enable(vcpu->arch.enabled_gmap);
2307         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2308         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2309                 __start_cpu_timer_accounting(vcpu);
2310         vcpu->cpu = cpu;
2311 }
2312
2313 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2314 {
2315         vcpu->cpu = -1;
2316         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2317                 __stop_cpu_timer_accounting(vcpu);
2318         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2319         vcpu->arch.enabled_gmap = gmap_get_enabled();
2320         gmap_disable(vcpu->arch.enabled_gmap);
2321
2322 }
2323
2324 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2325 {
2326         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2327         vcpu->arch.sie_block->gpsw.mask = 0UL;
2328         vcpu->arch.sie_block->gpsw.addr = 0UL;
2329         kvm_s390_set_prefix(vcpu, 0);
2330         kvm_s390_set_cpu_timer(vcpu, 0);
2331         vcpu->arch.sie_block->ckc       = 0UL;
2332         vcpu->arch.sie_block->todpr     = 0;
2333         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2334         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2335         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2336         /* make sure the new fpc will be lazily loaded */
2337         save_fpu_regs();
2338         current->thread.fpu.fpc = 0;
2339         vcpu->arch.sie_block->gbea = 1;
2340         vcpu->arch.sie_block->pp = 0;
2341         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2342         kvm_clear_async_pf_completion_queue(vcpu);
2343         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2344                 kvm_s390_vcpu_stop(vcpu);
2345         kvm_s390_clear_local_irqs(vcpu);
2346 }
2347
2348 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2349 {
2350         mutex_lock(&vcpu->kvm->lock);
2351         preempt_disable();
2352         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2353         preempt_enable();
2354         mutex_unlock(&vcpu->kvm->lock);
2355         if (!kvm_is_ucontrol(vcpu->kvm)) {
2356                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2357                 sca_add_vcpu(vcpu);
2358         }
2359         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2360                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2361         /* make vcpu_load load the right gmap on the first trigger */
2362         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2363 }
2364
2365 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2366 {
2367         if (!test_kvm_facility(vcpu->kvm, 76))
2368                 return;
2369
2370         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2371
2372         if (vcpu->kvm->arch.crypto.aes_kw)
2373                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2374         if (vcpu->kvm->arch.crypto.dea_kw)
2375                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2376
2377         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2378 }
2379
2380 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2381 {
2382         free_page(vcpu->arch.sie_block->cbrlo);
2383         vcpu->arch.sie_block->cbrlo = 0;
2384 }
2385
2386 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2387 {
2388         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2389         if (!vcpu->arch.sie_block->cbrlo)
2390                 return -ENOMEM;
2391
2392         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2393         return 0;
2394 }
2395
2396 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2397 {
2398         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2399
2400         vcpu->arch.sie_block->ibc = model->ibc;
2401         if (test_kvm_facility(vcpu->kvm, 7))
2402                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2403 }
2404
2405 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2406 {
2407         int rc = 0;
2408
2409         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2410                                                     CPUSTAT_SM |
2411                                                     CPUSTAT_STOPPED);
2412
2413         if (test_kvm_facility(vcpu->kvm, 78))
2414                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2415         else if (test_kvm_facility(vcpu->kvm, 8))
2416                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2417
2418         kvm_s390_vcpu_setup_model(vcpu);
2419
2420         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2421         if (MACHINE_HAS_ESOP)
2422                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2423         if (test_kvm_facility(vcpu->kvm, 9))
2424                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2425         if (test_kvm_facility(vcpu->kvm, 73))
2426                 vcpu->arch.sie_block->ecb |= ECB_TE;
2427
2428         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2429                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2430         if (test_kvm_facility(vcpu->kvm, 130))
2431                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2432         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2433         if (sclp.has_cei)
2434                 vcpu->arch.sie_block->eca |= ECA_CEI;
2435         if (sclp.has_ib)
2436                 vcpu->arch.sie_block->eca |= ECA_IB;
2437         if (sclp.has_siif)
2438                 vcpu->arch.sie_block->eca |= ECA_SII;
2439         if (sclp.has_sigpif)
2440                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2441         if (test_kvm_facility(vcpu->kvm, 129)) {
2442                 vcpu->arch.sie_block->eca |= ECA_VX;
2443                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2444         }
2445         if (test_kvm_facility(vcpu->kvm, 139))
2446                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2447
2448         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2449                                         | SDNXC;
2450         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2451
2452         if (sclp.has_kss)
2453                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2454         else
2455                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2456
2457         if (vcpu->kvm->arch.use_cmma) {
2458                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2459                 if (rc)
2460                         return rc;
2461         }
2462         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2463         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2464
2465         kvm_s390_vcpu_crypto_setup(vcpu);
2466
2467         return rc;
2468 }
2469
2470 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2471                                       unsigned int id)
2472 {
2473         struct kvm_vcpu *vcpu;
2474         struct sie_page *sie_page;
2475         int rc = -EINVAL;
2476
2477         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2478                 goto out;
2479
2480         rc = -ENOMEM;
2481
2482         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2483         if (!vcpu)
2484                 goto out;
2485
2486         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2487         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2488         if (!sie_page)
2489                 goto out_free_cpu;
2490
2491         vcpu->arch.sie_block = &sie_page->sie_block;
2492         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2493
2494         /* the real guest size will always be smaller than msl */
2495         vcpu->arch.sie_block->mso = 0;
2496         vcpu->arch.sie_block->msl = sclp.hamax;
2497
2498         vcpu->arch.sie_block->icpua = id;
2499         spin_lock_init(&vcpu->arch.local_int.lock);
2500         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2501         vcpu->arch.local_int.wq = &vcpu->wq;
2502         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2503         seqcount_init(&vcpu->arch.cputm_seqcount);
2504
2505         rc = kvm_vcpu_init(vcpu, kvm, id);
2506         if (rc)
2507                 goto out_free_sie_block;
2508         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2509                  vcpu->arch.sie_block);
2510         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2511
2512         return vcpu;
2513 out_free_sie_block:
2514         free_page((unsigned long)(vcpu->arch.sie_block));
2515 out_free_cpu:
2516         kmem_cache_free(kvm_vcpu_cache, vcpu);
2517 out:
2518         return ERR_PTR(rc);
2519 }
2520
2521 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2522 {
2523         return kvm_s390_vcpu_has_irq(vcpu, 0);
2524 }
2525
2526 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2527 {
2528         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2529 }
2530
2531 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2532 {
2533         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2534         exit_sie(vcpu);
2535 }
2536
2537 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2538 {
2539         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2540 }
2541
2542 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2543 {
2544         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2545         exit_sie(vcpu);
2546 }
2547
2548 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2549 {
2550         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2551 }
2552
2553 /*
2554  * Kick a guest cpu out of SIE and wait until SIE is not running.
2555  * If the CPU is not running (e.g. waiting as idle) the function will
2556  * return immediately. */
2557 void exit_sie(struct kvm_vcpu *vcpu)
2558 {
2559         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2560         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2561                 cpu_relax();
2562 }
2563
2564 /* Kick a guest cpu out of SIE to process a request synchronously */
2565 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2566 {
2567         kvm_make_request(req, vcpu);
2568         kvm_s390_vcpu_request(vcpu);
2569 }
2570
2571 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2572                               unsigned long end)
2573 {
2574         struct kvm *kvm = gmap->private;
2575         struct kvm_vcpu *vcpu;
2576         unsigned long prefix;
2577         int i;
2578
2579         if (gmap_is_shadow(gmap))
2580                 return;
2581         if (start >= 1UL << 31)
2582                 /* We are only interested in prefix pages */
2583                 return;
2584         kvm_for_each_vcpu(i, vcpu, kvm) {
2585                 /* match against both prefix pages */
2586                 prefix = kvm_s390_get_prefix(vcpu);
2587                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2588                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2589                                    start, end);
2590                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2591                 }
2592         }
2593 }
2594
2595 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2596 {
2597         /* kvm common code refers to this, but never calls it */
2598         BUG();
2599         return 0;
2600 }
2601
2602 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2603                                            struct kvm_one_reg *reg)
2604 {
2605         int r = -EINVAL;
2606
2607         switch (reg->id) {
2608         case KVM_REG_S390_TODPR:
2609                 r = put_user(vcpu->arch.sie_block->todpr,
2610                              (u32 __user *)reg->addr);
2611                 break;
2612         case KVM_REG_S390_EPOCHDIFF:
2613                 r = put_user(vcpu->arch.sie_block->epoch,
2614                              (u64 __user *)reg->addr);
2615                 break;
2616         case KVM_REG_S390_CPU_TIMER:
2617                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2618                              (u64 __user *)reg->addr);
2619                 break;
2620         case KVM_REG_S390_CLOCK_COMP:
2621                 r = put_user(vcpu->arch.sie_block->ckc,
2622                              (u64 __user *)reg->addr);
2623                 break;
2624         case KVM_REG_S390_PFTOKEN:
2625                 r = put_user(vcpu->arch.pfault_token,
2626                              (u64 __user *)reg->addr);
2627                 break;
2628         case KVM_REG_S390_PFCOMPARE:
2629                 r = put_user(vcpu->arch.pfault_compare,
2630                              (u64 __user *)reg->addr);
2631                 break;
2632         case KVM_REG_S390_PFSELECT:
2633                 r = put_user(vcpu->arch.pfault_select,
2634                              (u64 __user *)reg->addr);
2635                 break;
2636         case KVM_REG_S390_PP:
2637                 r = put_user(vcpu->arch.sie_block->pp,
2638                              (u64 __user *)reg->addr);
2639                 break;
2640         case KVM_REG_S390_GBEA:
2641                 r = put_user(vcpu->arch.sie_block->gbea,
2642                              (u64 __user *)reg->addr);
2643                 break;
2644         default:
2645                 break;
2646         }
2647
2648         return r;
2649 }
2650
2651 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2652                                            struct kvm_one_reg *reg)
2653 {
2654         int r = -EINVAL;
2655         __u64 val;
2656
2657         switch (reg->id) {
2658         case KVM_REG_S390_TODPR:
2659                 r = get_user(vcpu->arch.sie_block->todpr,
2660                              (u32 __user *)reg->addr);
2661                 break;
2662         case KVM_REG_S390_EPOCHDIFF:
2663                 r = get_user(vcpu->arch.sie_block->epoch,
2664                              (u64 __user *)reg->addr);
2665                 break;
2666         case KVM_REG_S390_CPU_TIMER:
2667                 r = get_user(val, (u64 __user *)reg->addr);
2668                 if (!r)
2669                         kvm_s390_set_cpu_timer(vcpu, val);
2670                 break;
2671         case KVM_REG_S390_CLOCK_COMP:
2672                 r = get_user(vcpu->arch.sie_block->ckc,
2673                              (u64 __user *)reg->addr);
2674                 break;
2675         case KVM_REG_S390_PFTOKEN:
2676                 r = get_user(vcpu->arch.pfault_token,
2677                              (u64 __user *)reg->addr);
2678                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2679                         kvm_clear_async_pf_completion_queue(vcpu);
2680                 break;
2681         case KVM_REG_S390_PFCOMPARE:
2682                 r = get_user(vcpu->arch.pfault_compare,
2683                              (u64 __user *)reg->addr);
2684                 break;
2685         case KVM_REG_S390_PFSELECT:
2686                 r = get_user(vcpu->arch.pfault_select,
2687                              (u64 __user *)reg->addr);
2688                 break;
2689         case KVM_REG_S390_PP:
2690                 r = get_user(vcpu->arch.sie_block->pp,
2691                              (u64 __user *)reg->addr);
2692                 break;
2693         case KVM_REG_S390_GBEA:
2694                 r = get_user(vcpu->arch.sie_block->gbea,
2695                              (u64 __user *)reg->addr);
2696                 break;
2697         default:
2698                 break;
2699         }
2700
2701         return r;
2702 }
2703
2704 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2705 {
2706         kvm_s390_vcpu_initial_reset(vcpu);
2707         return 0;
2708 }
2709
2710 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2711 {
2712         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2713         return 0;
2714 }
2715
2716 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2717 {
2718         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2719         return 0;
2720 }
2721
2722 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2723                                   struct kvm_sregs *sregs)
2724 {
2725         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2726         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2727         return 0;
2728 }
2729
2730 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2731                                   struct kvm_sregs *sregs)
2732 {
2733         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2734         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2735         return 0;
2736 }
2737
2738 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2739 {
2740         if (test_fp_ctl(fpu->fpc))
2741                 return -EINVAL;
2742         vcpu->run->s.regs.fpc = fpu->fpc;
2743         if (MACHINE_HAS_VX)
2744                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2745                                  (freg_t *) fpu->fprs);
2746         else
2747                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2748         return 0;
2749 }
2750
2751 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2752 {
2753         /* make sure we have the latest values */
2754         save_fpu_regs();
2755         if (MACHINE_HAS_VX)
2756                 convert_vx_to_fp((freg_t *) fpu->fprs,
2757                                  (__vector128 *) vcpu->run->s.regs.vrs);
2758         else
2759                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2760         fpu->fpc = vcpu->run->s.regs.fpc;
2761         return 0;
2762 }
2763
2764 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2765 {
2766         int rc = 0;
2767
2768         if (!is_vcpu_stopped(vcpu))
2769                 rc = -EBUSY;
2770         else {
2771                 vcpu->run->psw_mask = psw.mask;
2772                 vcpu->run->psw_addr = psw.addr;
2773         }
2774         return rc;
2775 }
2776
2777 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2778                                   struct kvm_translation *tr)
2779 {
2780         return -EINVAL; /* not implemented yet */
2781 }
2782
2783 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2784                               KVM_GUESTDBG_USE_HW_BP | \
2785                               KVM_GUESTDBG_ENABLE)
2786
2787 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2788                                         struct kvm_guest_debug *dbg)
2789 {
2790         int rc = 0;
2791
2792         vcpu->guest_debug = 0;
2793         kvm_s390_clear_bp_data(vcpu);
2794
2795         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2796                 return -EINVAL;
2797         if (!sclp.has_gpere)
2798                 return -EINVAL;
2799
2800         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2801                 vcpu->guest_debug = dbg->control;
2802                 /* enforce guest PER */
2803                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2804
2805                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2806                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2807         } else {
2808                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2809                 vcpu->arch.guestdbg.last_bp = 0;
2810         }
2811
2812         if (rc) {
2813                 vcpu->guest_debug = 0;
2814                 kvm_s390_clear_bp_data(vcpu);
2815                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2816         }
2817
2818         return rc;
2819 }
2820
2821 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2822                                     struct kvm_mp_state *mp_state)
2823 {
2824         /* CHECK_STOP and LOAD are not supported yet */
2825         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2826                                        KVM_MP_STATE_OPERATING;
2827 }
2828
2829 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2830                                     struct kvm_mp_state *mp_state)
2831 {
2832         int rc = 0;
2833
2834         /* user space knows about this interface - let it control the state */
2835         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2836
2837         switch (mp_state->mp_state) {
2838         case KVM_MP_STATE_STOPPED:
2839                 kvm_s390_vcpu_stop(vcpu);
2840                 break;
2841         case KVM_MP_STATE_OPERATING:
2842                 kvm_s390_vcpu_start(vcpu);
2843                 break;
2844         case KVM_MP_STATE_LOAD:
2845         case KVM_MP_STATE_CHECK_STOP:
2846                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2847         default:
2848                 rc = -ENXIO;
2849         }
2850
2851         return rc;
2852 }
2853
2854 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2855 {
2856         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2857 }
2858
2859 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2860 {
2861 retry:
2862         kvm_s390_vcpu_request_handled(vcpu);
2863         if (!kvm_request_pending(vcpu))
2864                 return 0;
2865         /*
2866          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2867          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2868          * This ensures that the ipte instruction for this request has
2869          * already finished. We might race against a second unmapper that
2870          * wants to set the blocking bit. Lets just retry the request loop.
2871          */
2872         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2873                 int rc;
2874                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2875                                           kvm_s390_get_prefix(vcpu),
2876                                           PAGE_SIZE * 2, PROT_WRITE);
2877                 if (rc) {
2878                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2879                         return rc;
2880                 }
2881                 goto retry;
2882         }
2883
2884         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2885                 vcpu->arch.sie_block->ihcpu = 0xffff;
2886                 goto retry;
2887         }
2888
2889         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2890                 if (!ibs_enabled(vcpu)) {
2891                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2892                         atomic_or(CPUSTAT_IBS,
2893                                         &vcpu->arch.sie_block->cpuflags);
2894                 }
2895                 goto retry;
2896         }
2897
2898         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2899                 if (ibs_enabled(vcpu)) {
2900                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2901                         atomic_andnot(CPUSTAT_IBS,
2902                                           &vcpu->arch.sie_block->cpuflags);
2903                 }
2904                 goto retry;
2905         }
2906
2907         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2908                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2909                 goto retry;
2910         }
2911
2912         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2913                 /*
2914                  * Disable CMMA virtualization; we will emulate the ESSA
2915                  * instruction manually, in order to provide additional
2916                  * functionalities needed for live migration.
2917                  */
2918                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2919                 goto retry;
2920         }
2921
2922         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2923                 /*
2924                  * Re-enable CMMA virtualization if CMMA is available and
2925                  * was used.
2926                  */
2927                 if ((vcpu->kvm->arch.use_cmma) &&
2928                     (vcpu->kvm->mm->context.use_cmma))
2929                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2930                 goto retry;
2931         }
2932
2933         /* nothing to do, just clear the request */
2934         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2935
2936         return 0;
2937 }
2938
2939 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2940                                  const struct kvm_s390_vm_tod_clock *gtod)
2941 {
2942         struct kvm_vcpu *vcpu;
2943         struct kvm_s390_tod_clock_ext htod;
2944         int i;
2945
2946         mutex_lock(&kvm->lock);
2947         preempt_disable();
2948
2949         get_tod_clock_ext((char *)&htod);
2950
2951         kvm->arch.epoch = gtod->tod - htod.tod;
2952         kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2953
2954         if (kvm->arch.epoch > gtod->tod)
2955                 kvm->arch.epdx -= 1;
2956
2957         kvm_s390_vcpu_block_all(kvm);
2958         kvm_for_each_vcpu(i, vcpu, kvm) {
2959                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2960                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2961         }
2962
2963         kvm_s390_vcpu_unblock_all(kvm);
2964         preempt_enable();
2965         mutex_unlock(&kvm->lock);
2966 }
2967
2968 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2969 {
2970         struct kvm_vcpu *vcpu;
2971         int i;
2972
2973         mutex_lock(&kvm->lock);
2974         preempt_disable();
2975         kvm->arch.epoch = tod - get_tod_clock();
2976         kvm_s390_vcpu_block_all(kvm);
2977         kvm_for_each_vcpu(i, vcpu, kvm)
2978                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2979         kvm_s390_vcpu_unblock_all(kvm);
2980         preempt_enable();
2981         mutex_unlock(&kvm->lock);
2982 }
2983
2984 /**
2985  * kvm_arch_fault_in_page - fault-in guest page if necessary
2986  * @vcpu: The corresponding virtual cpu
2987  * @gpa: Guest physical address
2988  * @writable: Whether the page should be writable or not
2989  *
2990  * Make sure that a guest page has been faulted-in on the host.
2991  *
2992  * Return: Zero on success, negative error code otherwise.
2993  */
2994 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2995 {
2996         return gmap_fault(vcpu->arch.gmap, gpa,
2997                           writable ? FAULT_FLAG_WRITE : 0);
2998 }
2999
3000 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3001                                       unsigned long token)
3002 {
3003         struct kvm_s390_interrupt inti;
3004         struct kvm_s390_irq irq;
3005
3006         if (start_token) {
3007                 irq.u.ext.ext_params2 = token;
3008                 irq.type = KVM_S390_INT_PFAULT_INIT;
3009                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3010         } else {
3011                 inti.type = KVM_S390_INT_PFAULT_DONE;
3012                 inti.parm64 = token;
3013                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3014         }
3015 }
3016
3017 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3018                                      struct kvm_async_pf *work)
3019 {
3020         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3021         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3022 }
3023
3024 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3025                                  struct kvm_async_pf *work)
3026 {
3027         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3028         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3029 }
3030
3031 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3032                                struct kvm_async_pf *work)
3033 {
3034         /* s390 will always inject the page directly */
3035 }
3036
3037 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3038 {
3039         /*
3040          * s390 will always inject the page directly,
3041          * but we still want check_async_completion to cleanup
3042          */
3043         return true;
3044 }
3045
3046 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3047 {
3048         hva_t hva;
3049         struct kvm_arch_async_pf arch;
3050         int rc;
3051
3052         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3053                 return 0;
3054         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3055             vcpu->arch.pfault_compare)
3056                 return 0;
3057         if (psw_extint_disabled(vcpu))
3058                 return 0;
3059         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3060                 return 0;
3061         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3062                 return 0;
3063         if (!vcpu->arch.gmap->pfault_enabled)
3064                 return 0;
3065
3066         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3067         hva += current->thread.gmap_addr & ~PAGE_MASK;
3068         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3069                 return 0;
3070
3071         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3072         return rc;
3073 }
3074
3075 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3076 {
3077         int rc, cpuflags;
3078
3079         /*
3080          * On s390 notifications for arriving pages will be delivered directly
3081          * to the guest but the house keeping for completed pfaults is
3082          * handled outside the worker.
3083          */
3084         kvm_check_async_pf_completion(vcpu);
3085
3086         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3087         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3088
3089         if (need_resched())
3090                 schedule();
3091
3092         if (test_cpu_flag(CIF_MCCK_PENDING))
3093                 s390_handle_mcck();
3094
3095         if (!kvm_is_ucontrol(vcpu->kvm)) {
3096                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3097                 if (rc)
3098                         return rc;
3099         }
3100
3101         rc = kvm_s390_handle_requests(vcpu);
3102         if (rc)
3103                 return rc;
3104
3105         if (guestdbg_enabled(vcpu)) {
3106                 kvm_s390_backup_guest_per_regs(vcpu);
3107                 kvm_s390_patch_guest_per_regs(vcpu);
3108         }
3109
3110         vcpu->arch.sie_block->icptcode = 0;
3111         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3112         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3113         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3114
3115         return 0;
3116 }
3117
3118 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3119 {
3120         struct kvm_s390_pgm_info pgm_info = {
3121                 .code = PGM_ADDRESSING,
3122         };
3123         u8 opcode, ilen;
3124         int rc;
3125
3126         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3127         trace_kvm_s390_sie_fault(vcpu);
3128
3129         /*
3130          * We want to inject an addressing exception, which is defined as a
3131          * suppressing or terminating exception. However, since we came here
3132          * by a DAT access exception, the PSW still points to the faulting
3133          * instruction since DAT exceptions are nullifying. So we've got
3134          * to look up the current opcode to get the length of the instruction
3135          * to be able to forward the PSW.
3136          */
3137         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3138         ilen = insn_length(opcode);
3139         if (rc < 0) {
3140                 return rc;
3141         } else if (rc) {
3142                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3143                  * Forward by arbitrary ilc, injection will take care of
3144                  * nullification if necessary.
3145                  */
3146                 pgm_info = vcpu->arch.pgm;
3147                 ilen = 4;
3148         }
3149         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3150         kvm_s390_forward_psw(vcpu, ilen);
3151         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3152 }
3153
3154 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3155 {
3156         struct mcck_volatile_info *mcck_info;
3157         struct sie_page *sie_page;
3158
3159         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3160                    vcpu->arch.sie_block->icptcode);
3161         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3162
3163         if (guestdbg_enabled(vcpu))
3164                 kvm_s390_restore_guest_per_regs(vcpu);
3165
3166         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3167         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3168
3169         if (exit_reason == -EINTR) {
3170                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3171                 sie_page = container_of(vcpu->arch.sie_block,
3172                                         struct sie_page, sie_block);
3173                 mcck_info = &sie_page->mcck_info;
3174                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3175                 return 0;
3176         }
3177
3178         if (vcpu->arch.sie_block->icptcode > 0) {
3179                 int rc = kvm_handle_sie_intercept(vcpu);
3180
3181                 if (rc != -EOPNOTSUPP)
3182                         return rc;
3183                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3184                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3185                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3186                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3187                 return -EREMOTE;
3188         } else if (exit_reason != -EFAULT) {
3189                 vcpu->stat.exit_null++;
3190                 return 0;
3191         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3192                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3193                 vcpu->run->s390_ucontrol.trans_exc_code =
3194                                                 current->thread.gmap_addr;
3195                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3196                 return -EREMOTE;
3197         } else if (current->thread.gmap_pfault) {
3198                 trace_kvm_s390_major_guest_pfault(vcpu);
3199                 current->thread.gmap_pfault = 0;
3200                 if (kvm_arch_setup_async_pf(vcpu))
3201                         return 0;
3202                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3203         }
3204         return vcpu_post_run_fault_in_sie(vcpu);
3205 }
3206
3207 static int __vcpu_run(struct kvm_vcpu *vcpu)
3208 {
3209         int rc, exit_reason;
3210
3211         /*
3212          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3213          * ning the guest), so that memslots (and other stuff) are protected
3214          */
3215         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3216
3217         do {
3218                 rc = vcpu_pre_run(vcpu);
3219                 if (rc)
3220                         break;
3221
3222                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3223                 /*
3224                  * As PF_VCPU will be used in fault handler, between
3225                  * guest_enter and guest_exit should be no uaccess.
3226                  */
3227                 local_irq_disable();
3228                 guest_enter_irqoff();
3229                 __disable_cpu_timer_accounting(vcpu);
3230                 local_irq_enable();
3231                 exit_reason = sie64a(vcpu->arch.sie_block,
3232                                      vcpu->run->s.regs.gprs);
3233                 local_irq_disable();
3234                 __enable_cpu_timer_accounting(vcpu);
3235                 guest_exit_irqoff();
3236                 local_irq_enable();
3237                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3238
3239                 rc = vcpu_post_run(vcpu, exit_reason);
3240         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3241
3242         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3243         return rc;
3244 }
3245
3246 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3247 {
3248         struct runtime_instr_cb *riccb;
3249         struct gs_cb *gscb;
3250
3251         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3252         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3253         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3254         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3255         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3256                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3257         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3258                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3259                 /* some control register changes require a tlb flush */
3260                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3261         }
3262         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3263                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3264                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3265                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3266                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3267                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3268         }
3269         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3270                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3271                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3272                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3273                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3274                         kvm_clear_async_pf_completion_queue(vcpu);
3275         }
3276         /*
3277          * If userspace sets the riccb (e.g. after migration) to a valid state,
3278          * we should enable RI here instead of doing the lazy enablement.
3279          */
3280         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3281             test_kvm_facility(vcpu->kvm, 64) &&
3282             riccb->v &&
3283             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3284                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3285                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3286         }
3287         /*
3288          * If userspace sets the gscb (e.g. after migration) to non-zero,
3289          * we should enable GS here instead of doing the lazy enablement.
3290          */
3291         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3292             test_kvm_facility(vcpu->kvm, 133) &&
3293             gscb->gssm &&
3294             !vcpu->arch.gs_enabled) {
3295                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3296                 vcpu->arch.sie_block->ecb |= ECB_GS;
3297                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3298                 vcpu->arch.gs_enabled = 1;
3299         }
3300         save_access_regs(vcpu->arch.host_acrs);
3301         restore_access_regs(vcpu->run->s.regs.acrs);
3302         /* save host (userspace) fprs/vrs */
3303         save_fpu_regs();
3304         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3305         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3306         if (MACHINE_HAS_VX)
3307                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3308         else
3309                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3310         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3311         if (test_fp_ctl(current->thread.fpu.fpc))
3312                 /* User space provided an invalid FPC, let's clear it */
3313                 current->thread.fpu.fpc = 0;
3314         if (MACHINE_HAS_GS) {
3315                 preempt_disable();
3316                 __ctl_set_bit(2, 4);
3317                 if (current->thread.gs_cb) {
3318                         vcpu->arch.host_gscb = current->thread.gs_cb;
3319                         save_gs_cb(vcpu->arch.host_gscb);
3320                 }
3321                 if (vcpu->arch.gs_enabled) {
3322                         current->thread.gs_cb = (struct gs_cb *)
3323                                                 &vcpu->run->s.regs.gscb;
3324                         restore_gs_cb(current->thread.gs_cb);
3325                 }
3326                 preempt_enable();
3327         }
3328
3329         kvm_run->kvm_dirty_regs = 0;
3330 }
3331
3332 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3333 {
3334         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3335         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3336         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3337         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3338         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3339         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3340         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3341         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3342         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3343         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3344         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3345         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3346         save_access_regs(vcpu->run->s.regs.acrs);
3347         restore_access_regs(vcpu->arch.host_acrs);
3348         /* Save guest register state */
3349         save_fpu_regs();
3350         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3351         /* Restore will be done lazily at return */
3352         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3353         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3354         if (MACHINE_HAS_GS) {
3355                 __ctl_set_bit(2, 4);
3356                 if (vcpu->arch.gs_enabled)
3357                         save_gs_cb(current->thread.gs_cb);
3358                 preempt_disable();
3359                 current->thread.gs_cb = vcpu->arch.host_gscb;
3360                 restore_gs_cb(vcpu->arch.host_gscb);
3361                 preempt_enable();
3362                 if (!vcpu->arch.host_gscb)
3363                         __ctl_clear_bit(2, 4);
3364                 vcpu->arch.host_gscb = NULL;
3365         }
3366
3367 }
3368
3369 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3370 {
3371         int rc;
3372
3373         if (kvm_run->immediate_exit)
3374                 return -EINTR;
3375
3376         if (guestdbg_exit_pending(vcpu)) {
3377                 kvm_s390_prepare_debug_exit(vcpu);
3378                 return 0;
3379         }
3380
3381         kvm_sigset_activate(vcpu);
3382
3383         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3384                 kvm_s390_vcpu_start(vcpu);
3385         } else if (is_vcpu_stopped(vcpu)) {
3386                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3387                                    vcpu->vcpu_id);
3388                 return -EINVAL;
3389         }
3390
3391         sync_regs(vcpu, kvm_run);
3392         enable_cpu_timer_accounting(vcpu);
3393
3394         might_fault();
3395         rc = __vcpu_run(vcpu);
3396
3397         if (signal_pending(current) && !rc) {
3398                 kvm_run->exit_reason = KVM_EXIT_INTR;
3399                 rc = -EINTR;
3400         }
3401
3402         if (guestdbg_exit_pending(vcpu) && !rc)  {
3403                 kvm_s390_prepare_debug_exit(vcpu);
3404                 rc = 0;
3405         }
3406
3407         if (rc == -EREMOTE) {
3408                 /* userspace support is needed, kvm_run has been prepared */
3409                 rc = 0;
3410         }
3411
3412         disable_cpu_timer_accounting(vcpu);
3413         store_regs(vcpu, kvm_run);
3414
3415         kvm_sigset_deactivate(vcpu);
3416
3417         vcpu->stat.exit_userspace++;
3418         return rc;
3419 }
3420
3421 /*
3422  * store status at address
3423  * we use have two special cases:
3424  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3425  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3426  */
3427 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3428 {
3429         unsigned char archmode = 1;
3430         freg_t fprs[NUM_FPRS];
3431         unsigned int px;
3432         u64 clkcomp, cputm;
3433         int rc;
3434
3435         px = kvm_s390_get_prefix(vcpu);
3436         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3437                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3438                         return -EFAULT;
3439                 gpa = 0;
3440         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3441                 if (write_guest_real(vcpu, 163, &archmode, 1))
3442                         return -EFAULT;
3443                 gpa = px;
3444         } else
3445                 gpa -= __LC_FPREGS_SAVE_AREA;
3446
3447         /* manually convert vector registers if necessary */
3448         if (MACHINE_HAS_VX) {
3449                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3450                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3451                                      fprs, 128);
3452         } else {
3453                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3454                                      vcpu->run->s.regs.fprs, 128);
3455         }
3456         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3457                               vcpu->run->s.regs.gprs, 128);
3458         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3459                               &vcpu->arch.sie_block->gpsw, 16);
3460         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3461                               &px, 4);
3462         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3463                               &vcpu->run->s.regs.fpc, 4);
3464         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3465                               &vcpu->arch.sie_block->todpr, 4);
3466         cputm = kvm_s390_get_cpu_timer(vcpu);
3467         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3468                               &cputm, 8);
3469         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3470         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3471                               &clkcomp, 8);
3472         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3473                               &vcpu->run->s.regs.acrs, 64);
3474         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3475                               &vcpu->arch.sie_block->gcr, 128);
3476         return rc ? -EFAULT : 0;
3477 }
3478
3479 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3480 {
3481         /*
3482          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3483          * switch in the run ioctl. Let's update our copies before we save
3484          * it into the save area
3485          */
3486         save_fpu_regs();
3487         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3488         save_access_regs(vcpu->run->s.regs.acrs);
3489
3490         return kvm_s390_store_status_unloaded(vcpu, addr);
3491 }
3492
3493 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3494 {
3495         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3496         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3497 }
3498
3499 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3500 {
3501         unsigned int i;
3502         struct kvm_vcpu *vcpu;
3503
3504         kvm_for_each_vcpu(i, vcpu, kvm) {
3505                 __disable_ibs_on_vcpu(vcpu);
3506         }
3507 }
3508
3509 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3510 {
3511         if (!sclp.has_ibs)
3512                 return;
3513         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3514         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3515 }
3516
3517 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3518 {
3519         int i, online_vcpus, started_vcpus = 0;
3520
3521         if (!is_vcpu_stopped(vcpu))
3522                 return;
3523
3524         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3525         /* Only one cpu at a time may enter/leave the STOPPED state. */
3526         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3527         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3528
3529         for (i = 0; i < online_vcpus; i++) {
3530                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3531                         started_vcpus++;
3532         }
3533
3534         if (started_vcpus == 0) {
3535                 /* we're the only active VCPU -> speed it up */
3536                 __enable_ibs_on_vcpu(vcpu);
3537         } else if (started_vcpus == 1) {
3538                 /*
3539                  * As we are starting a second VCPU, we have to disable
3540                  * the IBS facility on all VCPUs to remove potentially
3541                  * oustanding ENABLE requests.
3542                  */
3543                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3544         }
3545
3546         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3547         /*
3548          * Another VCPU might have used IBS while we were offline.
3549          * Let's play safe and flush the VCPU at startup.
3550          */
3551         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3552         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3553         return;
3554 }
3555
3556 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3557 {
3558         int i, online_vcpus, started_vcpus = 0;
3559         struct kvm_vcpu *started_vcpu = NULL;
3560
3561         if (is_vcpu_stopped(vcpu))
3562                 return;
3563
3564         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3565         /* Only one cpu at a time may enter/leave the STOPPED state. */
3566         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3567         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3568
3569         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3570         kvm_s390_clear_stop_irq(vcpu);
3571
3572         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3573         __disable_ibs_on_vcpu(vcpu);
3574
3575         for (i = 0; i < online_vcpus; i++) {
3576                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3577                         started_vcpus++;
3578                         started_vcpu = vcpu->kvm->vcpus[i];
3579                 }
3580         }
3581
3582         if (started_vcpus == 1) {
3583                 /*
3584                  * As we only have one VCPU left, we want to enable the
3585                  * IBS facility for that VCPU to speed it up.
3586                  */
3587                 __enable_ibs_on_vcpu(started_vcpu);
3588         }
3589
3590         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3591         return;
3592 }
3593
3594 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3595                                      struct kvm_enable_cap *cap)
3596 {
3597         int r;
3598
3599         if (cap->flags)
3600                 return -EINVAL;
3601
3602         switch (cap->cap) {
3603         case KVM_CAP_S390_CSS_SUPPORT:
3604                 if (!vcpu->kvm->arch.css_support) {
3605                         vcpu->kvm->arch.css_support = 1;
3606                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3607                         trace_kvm_s390_enable_css(vcpu->kvm);
3608                 }
3609                 r = 0;
3610                 break;
3611         default:
3612                 r = -EINVAL;
3613                 break;
3614         }
3615         return r;
3616 }
3617
3618 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3619                                   struct kvm_s390_mem_op *mop)
3620 {
3621         void __user *uaddr = (void __user *)mop->buf;
3622         void *tmpbuf = NULL;
3623         int r, srcu_idx;
3624         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3625                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3626
3627         if (mop->flags & ~supported_flags)
3628                 return -EINVAL;
3629
3630         if (mop->size > MEM_OP_MAX_SIZE)
3631                 return -E2BIG;
3632
3633         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3634                 tmpbuf = vmalloc(mop->size);
3635                 if (!tmpbuf)
3636                         return -ENOMEM;
3637         }
3638
3639         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3640
3641         switch (mop->op) {
3642         case KVM_S390_MEMOP_LOGICAL_READ:
3643                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3644                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3645                                             mop->size, GACC_FETCH);
3646                         break;
3647                 }
3648                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3649                 if (r == 0) {
3650                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3651                                 r = -EFAULT;
3652                 }
3653                 break;
3654         case KVM_S390_MEMOP_LOGICAL_WRITE:
3655                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3656                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3657                                             mop->size, GACC_STORE);
3658                         break;
3659                 }
3660                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3661                         r = -EFAULT;
3662                         break;
3663                 }
3664                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3665                 break;
3666         default:
3667                 r = -EINVAL;
3668         }
3669
3670         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3671
3672         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3673                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3674
3675         vfree(tmpbuf);
3676         return r;
3677 }
3678
3679 long kvm_arch_vcpu_ioctl(struct file *filp,
3680                          unsigned int ioctl, unsigned long arg)
3681 {
3682         struct kvm_vcpu *vcpu = filp->private_data;
3683         void __user *argp = (void __user *)arg;
3684         int idx;
3685         long r;
3686
3687         switch (ioctl) {
3688         case KVM_S390_IRQ: {
3689                 struct kvm_s390_irq s390irq;
3690
3691                 r = -EFAULT;
3692                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3693                         break;
3694                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3695                 break;
3696         }
3697         case KVM_S390_INTERRUPT: {
3698                 struct kvm_s390_interrupt s390int;
3699                 struct kvm_s390_irq s390irq;
3700
3701                 r = -EFAULT;
3702                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3703                         break;
3704                 if (s390int_to_s390irq(&s390int, &s390irq))
3705                         return -EINVAL;
3706                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3707                 break;
3708         }
3709         case KVM_S390_STORE_STATUS:
3710                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3711                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3712                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3713                 break;
3714         case KVM_S390_SET_INITIAL_PSW: {
3715                 psw_t psw;
3716
3717                 r = -EFAULT;
3718                 if (copy_from_user(&psw, argp, sizeof(psw)))
3719                         break;
3720                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3721                 break;
3722         }
3723         case KVM_S390_INITIAL_RESET:
3724                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3725                 break;
3726         case KVM_SET_ONE_REG:
3727         case KVM_GET_ONE_REG: {
3728                 struct kvm_one_reg reg;
3729                 r = -EFAULT;
3730                 if (copy_from_user(&reg, argp, sizeof(reg)))
3731                         break;
3732                 if (ioctl == KVM_SET_ONE_REG)
3733                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3734                 else
3735                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3736                 break;
3737         }
3738 #ifdef CONFIG_KVM_S390_UCONTROL
3739         case KVM_S390_UCAS_MAP: {
3740                 struct kvm_s390_ucas_mapping ucasmap;
3741
3742                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3743                         r = -EFAULT;
3744                         break;
3745                 }
3746
3747                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3748                         r = -EINVAL;
3749                         break;
3750                 }
3751
3752                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3753                                      ucasmap.vcpu_addr, ucasmap.length);
3754                 break;
3755         }
3756         case KVM_S390_UCAS_UNMAP: {
3757                 struct kvm_s390_ucas_mapping ucasmap;
3758
3759                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3760                         r = -EFAULT;
3761                         break;
3762                 }
3763
3764                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3765                         r = -EINVAL;
3766                         break;
3767                 }
3768
3769                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3770                         ucasmap.length);
3771                 break;
3772         }
3773 #endif
3774         case KVM_S390_VCPU_FAULT: {
3775                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3776                 break;
3777         }
3778         case KVM_ENABLE_CAP:
3779         {
3780                 struct kvm_enable_cap cap;
3781                 r = -EFAULT;
3782                 if (copy_from_user(&cap, argp, sizeof(cap)))
3783                         break;
3784                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3785                 break;
3786         }
3787         case KVM_S390_MEM_OP: {
3788                 struct kvm_s390_mem_op mem_op;
3789
3790                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3791                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3792                 else
3793                         r = -EFAULT;
3794                 break;
3795         }
3796         case KVM_S390_SET_IRQ_STATE: {
3797                 struct kvm_s390_irq_state irq_state;
3798
3799                 r = -EFAULT;
3800                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3801                         break;
3802                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3803                     irq_state.len == 0 ||
3804                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3805                         r = -EINVAL;
3806                         break;
3807                 }
3808                 /* do not use irq_state.flags, it will break old QEMUs */
3809                 r = kvm_s390_set_irq_state(vcpu,
3810                                            (void __user *) irq_state.buf,
3811                                            irq_state.len);
3812                 break;
3813         }
3814         case KVM_S390_GET_IRQ_STATE: {
3815                 struct kvm_s390_irq_state irq_state;
3816
3817                 r = -EFAULT;
3818                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3819                         break;
3820                 if (irq_state.len == 0) {
3821                         r = -EINVAL;
3822                         break;
3823                 }
3824                 /* do not use irq_state.flags, it will break old QEMUs */
3825                 r = kvm_s390_get_irq_state(vcpu,
3826                                            (__u8 __user *)  irq_state.buf,
3827                                            irq_state.len);
3828                 break;
3829         }
3830         default:
3831                 r = -ENOTTY;
3832         }
3833         return r;
3834 }
3835
3836 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3837 {
3838 #ifdef CONFIG_KVM_S390_UCONTROL
3839         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3840                  && (kvm_is_ucontrol(vcpu->kvm))) {
3841                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3842                 get_page(vmf->page);
3843                 return 0;
3844         }
3845 #endif
3846         return VM_FAULT_SIGBUS;
3847 }
3848
3849 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3850                             unsigned long npages)
3851 {
3852         return 0;
3853 }
3854
3855 /* Section: memory related */
3856 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3857                                    struct kvm_memory_slot *memslot,
3858                                    const struct kvm_userspace_memory_region *mem,
3859                                    enum kvm_mr_change change)
3860 {
3861         /* A few sanity checks. We can have memory slots which have to be
3862            located/ended at a segment boundary (1MB). The memory in userland is
3863            ok to be fragmented into various different vmas. It is okay to mmap()
3864            and munmap() stuff in this slot after doing this call at any time */
3865
3866         if (mem->userspace_addr & 0xffffful)
3867                 return -EINVAL;
3868
3869         if (mem->memory_size & 0xffffful)
3870                 return -EINVAL;
3871
3872         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3873                 return -EINVAL;
3874
3875         return 0;
3876 }
3877
3878 void kvm_arch_commit_memory_region(struct kvm *kvm,
3879                                 const struct kvm_userspace_memory_region *mem,
3880                                 const struct kvm_memory_slot *old,
3881                                 const struct kvm_memory_slot *new,
3882                                 enum kvm_mr_change change)
3883 {
3884         int rc;
3885
3886         /* If the basics of the memslot do not change, we do not want
3887          * to update the gmap. Every update causes several unnecessary
3888          * segment translation exceptions. This is usually handled just
3889          * fine by the normal fault handler + gmap, but it will also
3890          * cause faults on the prefix page of running guest CPUs.
3891          */
3892         if (old->userspace_addr == mem->userspace_addr &&
3893             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3894             old->npages * PAGE_SIZE == mem->memory_size)
3895                 return;
3896
3897         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3898                 mem->guest_phys_addr, mem->memory_size);
3899         if (rc)
3900                 pr_warn("failed to commit memory region\n");
3901         return;
3902 }
3903
3904 static inline unsigned long nonhyp_mask(int i)
3905 {
3906         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3907
3908         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3909 }
3910
3911 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3912 {
3913         vcpu->valid_wakeup = false;
3914 }
3915
3916 static int __init kvm_s390_init(void)
3917 {
3918         int i;
3919
3920         if (!sclp.has_sief2) {
3921                 pr_info("SIE not available\n");
3922                 return -ENODEV;
3923         }
3924
3925         for (i = 0; i < 16; i++)
3926                 kvm_s390_fac_list_mask[i] |=
3927                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3928
3929         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3930 }
3931
3932 static void __exit kvm_s390_exit(void)
3933 {
3934         kvm_exit();
3935 }
3936
3937 module_init(kvm_s390_init);
3938 module_exit(kvm_s390_exit);
3939
3940 /*
3941  * Enable autoloading of the kvm module.
3942  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3943  * since x86 takes a different approach.
3944  */
3945 #include <linux/miscdevice.h>
3946 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3947 MODULE_ALIAS("devname:kvm");