Merge remote-tracking branches 'asoc/fix/compress', 'asoc/fix/core', 'asoc/fix/dapm...
[sfrench/cifs-2.6.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2017
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62         { "userspace_handled", VCPU_STAT(exit_userspace) },
63         { "exit_null", VCPU_STAT(exit_null) },
64         { "exit_validity", VCPU_STAT(exit_validity) },
65         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66         { "exit_external_request", VCPU_STAT(exit_external_request) },
67         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68         { "exit_instruction", VCPU_STAT(exit_instruction) },
69         { "exit_pei", VCPU_STAT(exit_pei) },
70         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92         { "instruction_spx", VCPU_STAT(instruction_spx) },
93         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94         { "instruction_stap", VCPU_STAT(instruction_stap) },
95         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99         { "instruction_essa", VCPU_STAT(instruction_essa) },
100         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104         { "instruction_sie", VCPU_STAT(instruction_sie) },
105         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121         { "diagnose_10", VCPU_STAT(diagnose_10) },
122         { "diagnose_44", VCPU_STAT(diagnose_44) },
123         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124         { "diagnose_258", VCPU_STAT(diagnose_258) },
125         { "diagnose_308", VCPU_STAT(diagnose_308) },
126         { "diagnose_500", VCPU_STAT(diagnose_500) },
127         { NULL }
128 };
129
130 struct kvm_s390_tod_clock_ext {
131         __u8 epoch_idx;
132         __u64 tod;
133         __u8 reserved[7];
134 } __packed;
135
136 /* allow nested virtualization in KVM (if enabled by user space) */
137 static int nested;
138 module_param(nested, int, S_IRUGO);
139 MODULE_PARM_DESC(nested, "Nested virtualization support");
140
141 /* upper facilities limit for kvm */
142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
143
144 unsigned long kvm_s390_fac_list_mask_size(void)
145 {
146         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
147         return ARRAY_SIZE(kvm_s390_fac_list_mask);
148 }
149
150 /* available cpu features supported by kvm */
151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
152 /* available subfunctions indicated via query / "test bit" */
153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
154
155 static struct gmap_notifier gmap_notifier;
156 static struct gmap_notifier vsie_gmap_notifier;
157 debug_info_t *kvm_s390_dbf;
158
159 /* Section: not file related */
160 int kvm_arch_hardware_enable(void)
161 {
162         /* every s390 is virtualization enabled ;-) */
163         return 0;
164 }
165
166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
167                               unsigned long end);
168
169 /*
170  * This callback is executed during stop_machine(). All CPUs are therefore
171  * temporarily stopped. In order not to change guest behavior, we have to
172  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
173  * so a CPU won't be stopped while calculating with the epoch.
174  */
175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
176                           void *v)
177 {
178         struct kvm *kvm;
179         struct kvm_vcpu *vcpu;
180         int i;
181         unsigned long long *delta = v;
182
183         list_for_each_entry(kvm, &vm_list, vm_list) {
184                 kvm->arch.epoch -= *delta;
185                 kvm_for_each_vcpu(i, vcpu, kvm) {
186                         vcpu->arch.sie_block->epoch -= *delta;
187                         if (vcpu->arch.cputm_enabled)
188                                 vcpu->arch.cputm_start += *delta;
189                         if (vcpu->arch.vsie_block)
190                                 vcpu->arch.vsie_block->epoch -= *delta;
191                 }
192         }
193         return NOTIFY_OK;
194 }
195
196 static struct notifier_block kvm_clock_notifier = {
197         .notifier_call = kvm_clock_sync,
198 };
199
200 int kvm_arch_hardware_setup(void)
201 {
202         gmap_notifier.notifier_call = kvm_gmap_notifier;
203         gmap_register_pte_notifier(&gmap_notifier);
204         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
205         gmap_register_pte_notifier(&vsie_gmap_notifier);
206         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
207                                        &kvm_clock_notifier);
208         return 0;
209 }
210
211 void kvm_arch_hardware_unsetup(void)
212 {
213         gmap_unregister_pte_notifier(&gmap_notifier);
214         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
215         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
216                                          &kvm_clock_notifier);
217 }
218
219 static void allow_cpu_feat(unsigned long nr)
220 {
221         set_bit_inv(nr, kvm_s390_available_cpu_feat);
222 }
223
224 static inline int plo_test_bit(unsigned char nr)
225 {
226         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
227         int cc;
228
229         asm volatile(
230                 /* Parameter registers are ignored for "test bit" */
231                 "       plo     0,0,0,0(0)\n"
232                 "       ipm     %0\n"
233                 "       srl     %0,28\n"
234                 : "=d" (cc)
235                 : "d" (r0)
236                 : "cc");
237         return cc == 0;
238 }
239
240 static void kvm_s390_cpu_feat_init(void)
241 {
242         int i;
243
244         for (i = 0; i < 256; ++i) {
245                 if (plo_test_bit(i))
246                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
247         }
248
249         if (test_facility(28)) /* TOD-clock steering */
250                 ptff(kvm_s390_available_subfunc.ptff,
251                      sizeof(kvm_s390_available_subfunc.ptff),
252                      PTFF_QAF);
253
254         if (test_facility(17)) { /* MSA */
255                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
256                               kvm_s390_available_subfunc.kmac);
257                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
258                               kvm_s390_available_subfunc.kmc);
259                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
260                               kvm_s390_available_subfunc.km);
261                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
262                               kvm_s390_available_subfunc.kimd);
263                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.klmd);
265         }
266         if (test_facility(76)) /* MSA3 */
267                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.pckmo);
269         if (test_facility(77)) { /* MSA4 */
270                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
271                               kvm_s390_available_subfunc.kmctr);
272                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
273                               kvm_s390_available_subfunc.kmf);
274                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
275                               kvm_s390_available_subfunc.kmo);
276                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
277                               kvm_s390_available_subfunc.pcc);
278         }
279         if (test_facility(57)) /* MSA5 */
280                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
281                               kvm_s390_available_subfunc.ppno);
282
283         if (test_facility(146)) /* MSA8 */
284                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
285                               kvm_s390_available_subfunc.kma);
286
287         if (MACHINE_HAS_ESOP)
288                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
289         /*
290          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
291          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
292          */
293         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
294             !test_facility(3) || !nested)
295                 return;
296         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
297         if (sclp.has_64bscao)
298                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
299         if (sclp.has_siif)
300                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
301         if (sclp.has_gpere)
302                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
303         if (sclp.has_gsls)
304                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
305         if (sclp.has_ib)
306                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
307         if (sclp.has_cei)
308                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
309         if (sclp.has_ibs)
310                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
311         if (sclp.has_kss)
312                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
313         /*
314          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
315          * all skey handling functions read/set the skey from the PGSTE
316          * instead of the real storage key.
317          *
318          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
319          * pages being detected as preserved although they are resident.
320          *
321          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
322          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
323          *
324          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
325          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
326          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
327          *
328          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
329          * cannot easily shadow the SCA because of the ipte lock.
330          */
331 }
332
333 int kvm_arch_init(void *opaque)
334 {
335         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
336         if (!kvm_s390_dbf)
337                 return -ENOMEM;
338
339         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
340                 debug_unregister(kvm_s390_dbf);
341                 return -ENOMEM;
342         }
343
344         kvm_s390_cpu_feat_init();
345
346         /* Register floating interrupt controller interface. */
347         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
348 }
349
350 void kvm_arch_exit(void)
351 {
352         debug_unregister(kvm_s390_dbf);
353 }
354
355 /* Section: device related */
356 long kvm_arch_dev_ioctl(struct file *filp,
357                         unsigned int ioctl, unsigned long arg)
358 {
359         if (ioctl == KVM_S390_ENABLE_SIE)
360                 return s390_enable_sie();
361         return -EINVAL;
362 }
363
364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
365 {
366         int r;
367
368         switch (ext) {
369         case KVM_CAP_S390_PSW:
370         case KVM_CAP_S390_GMAP:
371         case KVM_CAP_SYNC_MMU:
372 #ifdef CONFIG_KVM_S390_UCONTROL
373         case KVM_CAP_S390_UCONTROL:
374 #endif
375         case KVM_CAP_ASYNC_PF:
376         case KVM_CAP_SYNC_REGS:
377         case KVM_CAP_ONE_REG:
378         case KVM_CAP_ENABLE_CAP:
379         case KVM_CAP_S390_CSS_SUPPORT:
380         case KVM_CAP_IOEVENTFD:
381         case KVM_CAP_DEVICE_CTRL:
382         case KVM_CAP_ENABLE_CAP_VM:
383         case KVM_CAP_S390_IRQCHIP:
384         case KVM_CAP_VM_ATTRIBUTES:
385         case KVM_CAP_MP_STATE:
386         case KVM_CAP_IMMEDIATE_EXIT:
387         case KVM_CAP_S390_INJECT_IRQ:
388         case KVM_CAP_S390_USER_SIGP:
389         case KVM_CAP_S390_USER_STSI:
390         case KVM_CAP_S390_SKEYS:
391         case KVM_CAP_S390_IRQ_STATE:
392         case KVM_CAP_S390_USER_INSTR0:
393         case KVM_CAP_S390_CMMA_MIGRATION:
394         case KVM_CAP_S390_AIS:
395         case KVM_CAP_S390_AIS_MIGRATION:
396                 r = 1;
397                 break;
398         case KVM_CAP_S390_MEM_OP:
399                 r = MEM_OP_MAX_SIZE;
400                 break;
401         case KVM_CAP_NR_VCPUS:
402         case KVM_CAP_MAX_VCPUS:
403                 r = KVM_S390_BSCA_CPU_SLOTS;
404                 if (!kvm_s390_use_sca_entries())
405                         r = KVM_MAX_VCPUS;
406                 else if (sclp.has_esca && sclp.has_64bscao)
407                         r = KVM_S390_ESCA_CPU_SLOTS;
408                 break;
409         case KVM_CAP_NR_MEMSLOTS:
410                 r = KVM_USER_MEM_SLOTS;
411                 break;
412         case KVM_CAP_S390_COW:
413                 r = MACHINE_HAS_ESOP;
414                 break;
415         case KVM_CAP_S390_VECTOR_REGISTERS:
416                 r = MACHINE_HAS_VX;
417                 break;
418         case KVM_CAP_S390_RI:
419                 r = test_facility(64);
420                 break;
421         case KVM_CAP_S390_GS:
422                 r = test_facility(133);
423                 break;
424         default:
425                 r = 0;
426         }
427         return r;
428 }
429
430 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
431                                         struct kvm_memory_slot *memslot)
432 {
433         gfn_t cur_gfn, last_gfn;
434         unsigned long address;
435         struct gmap *gmap = kvm->arch.gmap;
436
437         /* Loop over all guest pages */
438         last_gfn = memslot->base_gfn + memslot->npages;
439         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
440                 address = gfn_to_hva_memslot(memslot, cur_gfn);
441
442                 if (test_and_clear_guest_dirty(gmap->mm, address))
443                         mark_page_dirty(kvm, cur_gfn);
444                 if (fatal_signal_pending(current))
445                         return;
446                 cond_resched();
447         }
448 }
449
450 /* Section: vm related */
451 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
452
453 /*
454  * Get (and clear) the dirty memory log for a memory slot.
455  */
456 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
457                                struct kvm_dirty_log *log)
458 {
459         int r;
460         unsigned long n;
461         struct kvm_memslots *slots;
462         struct kvm_memory_slot *memslot;
463         int is_dirty = 0;
464
465         if (kvm_is_ucontrol(kvm))
466                 return -EINVAL;
467
468         mutex_lock(&kvm->slots_lock);
469
470         r = -EINVAL;
471         if (log->slot >= KVM_USER_MEM_SLOTS)
472                 goto out;
473
474         slots = kvm_memslots(kvm);
475         memslot = id_to_memslot(slots, log->slot);
476         r = -ENOENT;
477         if (!memslot->dirty_bitmap)
478                 goto out;
479
480         kvm_s390_sync_dirty_log(kvm, memslot);
481         r = kvm_get_dirty_log(kvm, log, &is_dirty);
482         if (r)
483                 goto out;
484
485         /* Clear the dirty log */
486         if (is_dirty) {
487                 n = kvm_dirty_bitmap_bytes(memslot);
488                 memset(memslot->dirty_bitmap, 0, n);
489         }
490         r = 0;
491 out:
492         mutex_unlock(&kvm->slots_lock);
493         return r;
494 }
495
496 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
497 {
498         unsigned int i;
499         struct kvm_vcpu *vcpu;
500
501         kvm_for_each_vcpu(i, vcpu, kvm) {
502                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
503         }
504 }
505
506 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
507 {
508         int r;
509
510         if (cap->flags)
511                 return -EINVAL;
512
513         switch (cap->cap) {
514         case KVM_CAP_S390_IRQCHIP:
515                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
516                 kvm->arch.use_irqchip = 1;
517                 r = 0;
518                 break;
519         case KVM_CAP_S390_USER_SIGP:
520                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
521                 kvm->arch.user_sigp = 1;
522                 r = 0;
523                 break;
524         case KVM_CAP_S390_VECTOR_REGISTERS:
525                 mutex_lock(&kvm->lock);
526                 if (kvm->created_vcpus) {
527                         r = -EBUSY;
528                 } else if (MACHINE_HAS_VX) {
529                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
530                         set_kvm_facility(kvm->arch.model.fac_list, 129);
531                         if (test_facility(134)) {
532                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
533                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
534                         }
535                         if (test_facility(135)) {
536                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
537                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
538                         }
539                         r = 0;
540                 } else
541                         r = -EINVAL;
542                 mutex_unlock(&kvm->lock);
543                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
544                          r ? "(not available)" : "(success)");
545                 break;
546         case KVM_CAP_S390_RI:
547                 r = -EINVAL;
548                 mutex_lock(&kvm->lock);
549                 if (kvm->created_vcpus) {
550                         r = -EBUSY;
551                 } else if (test_facility(64)) {
552                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
553                         set_kvm_facility(kvm->arch.model.fac_list, 64);
554                         r = 0;
555                 }
556                 mutex_unlock(&kvm->lock);
557                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
558                          r ? "(not available)" : "(success)");
559                 break;
560         case KVM_CAP_S390_AIS:
561                 mutex_lock(&kvm->lock);
562                 if (kvm->created_vcpus) {
563                         r = -EBUSY;
564                 } else {
565                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
566                         set_kvm_facility(kvm->arch.model.fac_list, 72);
567                         r = 0;
568                 }
569                 mutex_unlock(&kvm->lock);
570                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
571                          r ? "(not available)" : "(success)");
572                 break;
573         case KVM_CAP_S390_GS:
574                 r = -EINVAL;
575                 mutex_lock(&kvm->lock);
576                 if (atomic_read(&kvm->online_vcpus)) {
577                         r = -EBUSY;
578                 } else if (test_facility(133)) {
579                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
580                         set_kvm_facility(kvm->arch.model.fac_list, 133);
581                         r = 0;
582                 }
583                 mutex_unlock(&kvm->lock);
584                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
585                          r ? "(not available)" : "(success)");
586                 break;
587         case KVM_CAP_S390_USER_STSI:
588                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
589                 kvm->arch.user_stsi = 1;
590                 r = 0;
591                 break;
592         case KVM_CAP_S390_USER_INSTR0:
593                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
594                 kvm->arch.user_instr0 = 1;
595                 icpt_operexc_on_all_vcpus(kvm);
596                 r = 0;
597                 break;
598         default:
599                 r = -EINVAL;
600                 break;
601         }
602         return r;
603 }
604
605 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
606 {
607         int ret;
608
609         switch (attr->attr) {
610         case KVM_S390_VM_MEM_LIMIT_SIZE:
611                 ret = 0;
612                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
613                          kvm->arch.mem_limit);
614                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
615                         ret = -EFAULT;
616                 break;
617         default:
618                 ret = -ENXIO;
619                 break;
620         }
621         return ret;
622 }
623
624 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
625 {
626         int ret;
627         unsigned int idx;
628         switch (attr->attr) {
629         case KVM_S390_VM_MEM_ENABLE_CMMA:
630                 ret = -ENXIO;
631                 if (!sclp.has_cmma)
632                         break;
633
634                 ret = -EBUSY;
635                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
636                 mutex_lock(&kvm->lock);
637                 if (!kvm->created_vcpus) {
638                         kvm->arch.use_cmma = 1;
639                         ret = 0;
640                 }
641                 mutex_unlock(&kvm->lock);
642                 break;
643         case KVM_S390_VM_MEM_CLR_CMMA:
644                 ret = -ENXIO;
645                 if (!sclp.has_cmma)
646                         break;
647                 ret = -EINVAL;
648                 if (!kvm->arch.use_cmma)
649                         break;
650
651                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
652                 mutex_lock(&kvm->lock);
653                 idx = srcu_read_lock(&kvm->srcu);
654                 s390_reset_cmma(kvm->arch.gmap->mm);
655                 srcu_read_unlock(&kvm->srcu, idx);
656                 mutex_unlock(&kvm->lock);
657                 ret = 0;
658                 break;
659         case KVM_S390_VM_MEM_LIMIT_SIZE: {
660                 unsigned long new_limit;
661
662                 if (kvm_is_ucontrol(kvm))
663                         return -EINVAL;
664
665                 if (get_user(new_limit, (u64 __user *)attr->addr))
666                         return -EFAULT;
667
668                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
669                     new_limit > kvm->arch.mem_limit)
670                         return -E2BIG;
671
672                 if (!new_limit)
673                         return -EINVAL;
674
675                 /* gmap_create takes last usable address */
676                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
677                         new_limit -= 1;
678
679                 ret = -EBUSY;
680                 mutex_lock(&kvm->lock);
681                 if (!kvm->created_vcpus) {
682                         /* gmap_create will round the limit up */
683                         struct gmap *new = gmap_create(current->mm, new_limit);
684
685                         if (!new) {
686                                 ret = -ENOMEM;
687                         } else {
688                                 gmap_remove(kvm->arch.gmap);
689                                 new->private = kvm;
690                                 kvm->arch.gmap = new;
691                                 ret = 0;
692                         }
693                 }
694                 mutex_unlock(&kvm->lock);
695                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
696                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
697                          (void *) kvm->arch.gmap->asce);
698                 break;
699         }
700         default:
701                 ret = -ENXIO;
702                 break;
703         }
704         return ret;
705 }
706
707 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
708
709 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
710 {
711         struct kvm_vcpu *vcpu;
712         int i;
713
714         if (!test_kvm_facility(kvm, 76))
715                 return -EINVAL;
716
717         mutex_lock(&kvm->lock);
718         switch (attr->attr) {
719         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
720                 get_random_bytes(
721                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
722                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
723                 kvm->arch.crypto.aes_kw = 1;
724                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
725                 break;
726         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
727                 get_random_bytes(
728                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
729                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
730                 kvm->arch.crypto.dea_kw = 1;
731                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
732                 break;
733         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
734                 kvm->arch.crypto.aes_kw = 0;
735                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
736                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
737                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
738                 break;
739         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
740                 kvm->arch.crypto.dea_kw = 0;
741                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
742                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
743                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
744                 break;
745         default:
746                 mutex_unlock(&kvm->lock);
747                 return -ENXIO;
748         }
749
750         kvm_for_each_vcpu(i, vcpu, kvm) {
751                 kvm_s390_vcpu_crypto_setup(vcpu);
752                 exit_sie(vcpu);
753         }
754         mutex_unlock(&kvm->lock);
755         return 0;
756 }
757
758 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
759 {
760         int cx;
761         struct kvm_vcpu *vcpu;
762
763         kvm_for_each_vcpu(cx, vcpu, kvm)
764                 kvm_s390_sync_request(req, vcpu);
765 }
766
767 /*
768  * Must be called with kvm->srcu held to avoid races on memslots, and with
769  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
770  */
771 static int kvm_s390_vm_start_migration(struct kvm *kvm)
772 {
773         struct kvm_s390_migration_state *mgs;
774         struct kvm_memory_slot *ms;
775         /* should be the only one */
776         struct kvm_memslots *slots;
777         unsigned long ram_pages;
778         int slotnr;
779
780         /* migration mode already enabled */
781         if (kvm->arch.migration_state)
782                 return 0;
783
784         slots = kvm_memslots(kvm);
785         if (!slots || !slots->used_slots)
786                 return -EINVAL;
787
788         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
789         if (!mgs)
790                 return -ENOMEM;
791         kvm->arch.migration_state = mgs;
792
793         if (kvm->arch.use_cmma) {
794                 /*
795                  * Get the first slot. They are reverse sorted by base_gfn, so
796                  * the first slot is also the one at the end of the address
797                  * space. We have verified above that at least one slot is
798                  * present.
799                  */
800                 ms = slots->memslots;
801                 /* round up so we only use full longs */
802                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
803                 /* allocate enough bytes to store all the bits */
804                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
805                 if (!mgs->pgste_bitmap) {
806                         kfree(mgs);
807                         kvm->arch.migration_state = NULL;
808                         return -ENOMEM;
809                 }
810
811                 mgs->bitmap_size = ram_pages;
812                 atomic64_set(&mgs->dirty_pages, ram_pages);
813                 /* mark all the pages in active slots as dirty */
814                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
815                         ms = slots->memslots + slotnr;
816                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
817                 }
818
819                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
820         }
821         return 0;
822 }
823
824 /*
825  * Must be called with kvm->lock to avoid races with ourselves and
826  * kvm_s390_vm_start_migration.
827  */
828 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
829 {
830         struct kvm_s390_migration_state *mgs;
831
832         /* migration mode already disabled */
833         if (!kvm->arch.migration_state)
834                 return 0;
835         mgs = kvm->arch.migration_state;
836         kvm->arch.migration_state = NULL;
837
838         if (kvm->arch.use_cmma) {
839                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
840                 vfree(mgs->pgste_bitmap);
841         }
842         kfree(mgs);
843         return 0;
844 }
845
846 static int kvm_s390_vm_set_migration(struct kvm *kvm,
847                                      struct kvm_device_attr *attr)
848 {
849         int idx, res = -ENXIO;
850
851         mutex_lock(&kvm->lock);
852         switch (attr->attr) {
853         case KVM_S390_VM_MIGRATION_START:
854                 idx = srcu_read_lock(&kvm->srcu);
855                 res = kvm_s390_vm_start_migration(kvm);
856                 srcu_read_unlock(&kvm->srcu, idx);
857                 break;
858         case KVM_S390_VM_MIGRATION_STOP:
859                 res = kvm_s390_vm_stop_migration(kvm);
860                 break;
861         default:
862                 break;
863         }
864         mutex_unlock(&kvm->lock);
865
866         return res;
867 }
868
869 static int kvm_s390_vm_get_migration(struct kvm *kvm,
870                                      struct kvm_device_attr *attr)
871 {
872         u64 mig = (kvm->arch.migration_state != NULL);
873
874         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
875                 return -ENXIO;
876
877         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
878                 return -EFAULT;
879         return 0;
880 }
881
882 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
883 {
884         struct kvm_s390_vm_tod_clock gtod;
885
886         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
887                 return -EFAULT;
888
889         if (test_kvm_facility(kvm, 139))
890                 kvm_s390_set_tod_clock_ext(kvm, &gtod);
891         else if (gtod.epoch_idx == 0)
892                 kvm_s390_set_tod_clock(kvm, gtod.tod);
893         else
894                 return -EINVAL;
895
896         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
897                 gtod.epoch_idx, gtod.tod);
898
899         return 0;
900 }
901
902 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
903 {
904         u8 gtod_high;
905
906         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
907                                            sizeof(gtod_high)))
908                 return -EFAULT;
909
910         if (gtod_high != 0)
911                 return -EINVAL;
912         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
913
914         return 0;
915 }
916
917 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
918 {
919         u64 gtod;
920
921         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
922                 return -EFAULT;
923
924         kvm_s390_set_tod_clock(kvm, gtod);
925         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
926         return 0;
927 }
928
929 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
930 {
931         int ret;
932
933         if (attr->flags)
934                 return -EINVAL;
935
936         switch (attr->attr) {
937         case KVM_S390_VM_TOD_EXT:
938                 ret = kvm_s390_set_tod_ext(kvm, attr);
939                 break;
940         case KVM_S390_VM_TOD_HIGH:
941                 ret = kvm_s390_set_tod_high(kvm, attr);
942                 break;
943         case KVM_S390_VM_TOD_LOW:
944                 ret = kvm_s390_set_tod_low(kvm, attr);
945                 break;
946         default:
947                 ret = -ENXIO;
948                 break;
949         }
950         return ret;
951 }
952
953 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
954                                         struct kvm_s390_vm_tod_clock *gtod)
955 {
956         struct kvm_s390_tod_clock_ext htod;
957
958         preempt_disable();
959
960         get_tod_clock_ext((char *)&htod);
961
962         gtod->tod = htod.tod + kvm->arch.epoch;
963         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
964
965         if (gtod->tod < htod.tod)
966                 gtod->epoch_idx += 1;
967
968         preempt_enable();
969 }
970
971 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
972 {
973         struct kvm_s390_vm_tod_clock gtod;
974
975         memset(&gtod, 0, sizeof(gtod));
976
977         if (test_kvm_facility(kvm, 139))
978                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
979         else
980                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
981
982         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
983                 return -EFAULT;
984
985         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
986                 gtod.epoch_idx, gtod.tod);
987         return 0;
988 }
989
990 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
991 {
992         u8 gtod_high = 0;
993
994         if (copy_to_user((void __user *)attr->addr, &gtod_high,
995                                          sizeof(gtod_high)))
996                 return -EFAULT;
997         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
998
999         return 0;
1000 }
1001
1002 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1003 {
1004         u64 gtod;
1005
1006         gtod = kvm_s390_get_tod_clock_fast(kvm);
1007         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1008                 return -EFAULT;
1009         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1010
1011         return 0;
1012 }
1013
1014 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1015 {
1016         int ret;
1017
1018         if (attr->flags)
1019                 return -EINVAL;
1020
1021         switch (attr->attr) {
1022         case KVM_S390_VM_TOD_EXT:
1023                 ret = kvm_s390_get_tod_ext(kvm, attr);
1024                 break;
1025         case KVM_S390_VM_TOD_HIGH:
1026                 ret = kvm_s390_get_tod_high(kvm, attr);
1027                 break;
1028         case KVM_S390_VM_TOD_LOW:
1029                 ret = kvm_s390_get_tod_low(kvm, attr);
1030                 break;
1031         default:
1032                 ret = -ENXIO;
1033                 break;
1034         }
1035         return ret;
1036 }
1037
1038 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1039 {
1040         struct kvm_s390_vm_cpu_processor *proc;
1041         u16 lowest_ibc, unblocked_ibc;
1042         int ret = 0;
1043
1044         mutex_lock(&kvm->lock);
1045         if (kvm->created_vcpus) {
1046                 ret = -EBUSY;
1047                 goto out;
1048         }
1049         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1050         if (!proc) {
1051                 ret = -ENOMEM;
1052                 goto out;
1053         }
1054         if (!copy_from_user(proc, (void __user *)attr->addr,
1055                             sizeof(*proc))) {
1056                 kvm->arch.model.cpuid = proc->cpuid;
1057                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1058                 unblocked_ibc = sclp.ibc & 0xfff;
1059                 if (lowest_ibc && proc->ibc) {
1060                         if (proc->ibc > unblocked_ibc)
1061                                 kvm->arch.model.ibc = unblocked_ibc;
1062                         else if (proc->ibc < lowest_ibc)
1063                                 kvm->arch.model.ibc = lowest_ibc;
1064                         else
1065                                 kvm->arch.model.ibc = proc->ibc;
1066                 }
1067                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1068                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1069                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1070                          kvm->arch.model.ibc,
1071                          kvm->arch.model.cpuid);
1072                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1073                          kvm->arch.model.fac_list[0],
1074                          kvm->arch.model.fac_list[1],
1075                          kvm->arch.model.fac_list[2]);
1076         } else
1077                 ret = -EFAULT;
1078         kfree(proc);
1079 out:
1080         mutex_unlock(&kvm->lock);
1081         return ret;
1082 }
1083
1084 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1085                                        struct kvm_device_attr *attr)
1086 {
1087         struct kvm_s390_vm_cpu_feat data;
1088         int ret = -EBUSY;
1089
1090         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1091                 return -EFAULT;
1092         if (!bitmap_subset((unsigned long *) data.feat,
1093                            kvm_s390_available_cpu_feat,
1094                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1095                 return -EINVAL;
1096
1097         mutex_lock(&kvm->lock);
1098         if (!atomic_read(&kvm->online_vcpus)) {
1099                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1100                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1101                 ret = 0;
1102         }
1103         mutex_unlock(&kvm->lock);
1104         return ret;
1105 }
1106
1107 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1108                                           struct kvm_device_attr *attr)
1109 {
1110         /*
1111          * Once supported by kernel + hw, we have to store the subfunctions
1112          * in kvm->arch and remember that user space configured them.
1113          */
1114         return -ENXIO;
1115 }
1116
1117 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1118 {
1119         int ret = -ENXIO;
1120
1121         switch (attr->attr) {
1122         case KVM_S390_VM_CPU_PROCESSOR:
1123                 ret = kvm_s390_set_processor(kvm, attr);
1124                 break;
1125         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1126                 ret = kvm_s390_set_processor_feat(kvm, attr);
1127                 break;
1128         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1129                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1130                 break;
1131         }
1132         return ret;
1133 }
1134
1135 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1136 {
1137         struct kvm_s390_vm_cpu_processor *proc;
1138         int ret = 0;
1139
1140         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1141         if (!proc) {
1142                 ret = -ENOMEM;
1143                 goto out;
1144         }
1145         proc->cpuid = kvm->arch.model.cpuid;
1146         proc->ibc = kvm->arch.model.ibc;
1147         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1148                S390_ARCH_FAC_LIST_SIZE_BYTE);
1149         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1150                  kvm->arch.model.ibc,
1151                  kvm->arch.model.cpuid);
1152         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1153                  kvm->arch.model.fac_list[0],
1154                  kvm->arch.model.fac_list[1],
1155                  kvm->arch.model.fac_list[2]);
1156         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1157                 ret = -EFAULT;
1158         kfree(proc);
1159 out:
1160         return ret;
1161 }
1162
1163 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1164 {
1165         struct kvm_s390_vm_cpu_machine *mach;
1166         int ret = 0;
1167
1168         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1169         if (!mach) {
1170                 ret = -ENOMEM;
1171                 goto out;
1172         }
1173         get_cpu_id((struct cpuid *) &mach->cpuid);
1174         mach->ibc = sclp.ibc;
1175         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1176                S390_ARCH_FAC_LIST_SIZE_BYTE);
1177         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1178                sizeof(S390_lowcore.stfle_fac_list));
1179         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1180                  kvm->arch.model.ibc,
1181                  kvm->arch.model.cpuid);
1182         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1183                  mach->fac_mask[0],
1184                  mach->fac_mask[1],
1185                  mach->fac_mask[2]);
1186         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1187                  mach->fac_list[0],
1188                  mach->fac_list[1],
1189                  mach->fac_list[2]);
1190         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1191                 ret = -EFAULT;
1192         kfree(mach);
1193 out:
1194         return ret;
1195 }
1196
1197 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1198                                        struct kvm_device_attr *attr)
1199 {
1200         struct kvm_s390_vm_cpu_feat data;
1201
1202         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1203                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1204         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1205                 return -EFAULT;
1206         return 0;
1207 }
1208
1209 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1210                                      struct kvm_device_attr *attr)
1211 {
1212         struct kvm_s390_vm_cpu_feat data;
1213
1214         bitmap_copy((unsigned long *) data.feat,
1215                     kvm_s390_available_cpu_feat,
1216                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1217         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1218                 return -EFAULT;
1219         return 0;
1220 }
1221
1222 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1223                                           struct kvm_device_attr *attr)
1224 {
1225         /*
1226          * Once we can actually configure subfunctions (kernel + hw support),
1227          * we have to check if they were already set by user space, if so copy
1228          * them from kvm->arch.
1229          */
1230         return -ENXIO;
1231 }
1232
1233 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1234                                         struct kvm_device_attr *attr)
1235 {
1236         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1237             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1238                 return -EFAULT;
1239         return 0;
1240 }
1241 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1242 {
1243         int ret = -ENXIO;
1244
1245         switch (attr->attr) {
1246         case KVM_S390_VM_CPU_PROCESSOR:
1247                 ret = kvm_s390_get_processor(kvm, attr);
1248                 break;
1249         case KVM_S390_VM_CPU_MACHINE:
1250                 ret = kvm_s390_get_machine(kvm, attr);
1251                 break;
1252         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1253                 ret = kvm_s390_get_processor_feat(kvm, attr);
1254                 break;
1255         case KVM_S390_VM_CPU_MACHINE_FEAT:
1256                 ret = kvm_s390_get_machine_feat(kvm, attr);
1257                 break;
1258         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1259                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1260                 break;
1261         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1262                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1263                 break;
1264         }
1265         return ret;
1266 }
1267
1268 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1269 {
1270         int ret;
1271
1272         switch (attr->group) {
1273         case KVM_S390_VM_MEM_CTRL:
1274                 ret = kvm_s390_set_mem_control(kvm, attr);
1275                 break;
1276         case KVM_S390_VM_TOD:
1277                 ret = kvm_s390_set_tod(kvm, attr);
1278                 break;
1279         case KVM_S390_VM_CPU_MODEL:
1280                 ret = kvm_s390_set_cpu_model(kvm, attr);
1281                 break;
1282         case KVM_S390_VM_CRYPTO:
1283                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1284                 break;
1285         case KVM_S390_VM_MIGRATION:
1286                 ret = kvm_s390_vm_set_migration(kvm, attr);
1287                 break;
1288         default:
1289                 ret = -ENXIO;
1290                 break;
1291         }
1292
1293         return ret;
1294 }
1295
1296 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1297 {
1298         int ret;
1299
1300         switch (attr->group) {
1301         case KVM_S390_VM_MEM_CTRL:
1302                 ret = kvm_s390_get_mem_control(kvm, attr);
1303                 break;
1304         case KVM_S390_VM_TOD:
1305                 ret = kvm_s390_get_tod(kvm, attr);
1306                 break;
1307         case KVM_S390_VM_CPU_MODEL:
1308                 ret = kvm_s390_get_cpu_model(kvm, attr);
1309                 break;
1310         case KVM_S390_VM_MIGRATION:
1311                 ret = kvm_s390_vm_get_migration(kvm, attr);
1312                 break;
1313         default:
1314                 ret = -ENXIO;
1315                 break;
1316         }
1317
1318         return ret;
1319 }
1320
1321 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1322 {
1323         int ret;
1324
1325         switch (attr->group) {
1326         case KVM_S390_VM_MEM_CTRL:
1327                 switch (attr->attr) {
1328                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1329                 case KVM_S390_VM_MEM_CLR_CMMA:
1330                         ret = sclp.has_cmma ? 0 : -ENXIO;
1331                         break;
1332                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1333                         ret = 0;
1334                         break;
1335                 default:
1336                         ret = -ENXIO;
1337                         break;
1338                 }
1339                 break;
1340         case KVM_S390_VM_TOD:
1341                 switch (attr->attr) {
1342                 case KVM_S390_VM_TOD_LOW:
1343                 case KVM_S390_VM_TOD_HIGH:
1344                         ret = 0;
1345                         break;
1346                 default:
1347                         ret = -ENXIO;
1348                         break;
1349                 }
1350                 break;
1351         case KVM_S390_VM_CPU_MODEL:
1352                 switch (attr->attr) {
1353                 case KVM_S390_VM_CPU_PROCESSOR:
1354                 case KVM_S390_VM_CPU_MACHINE:
1355                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1356                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1357                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1358                         ret = 0;
1359                         break;
1360                 /* configuring subfunctions is not supported yet */
1361                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1362                 default:
1363                         ret = -ENXIO;
1364                         break;
1365                 }
1366                 break;
1367         case KVM_S390_VM_CRYPTO:
1368                 switch (attr->attr) {
1369                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1370                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1371                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1372                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1373                         ret = 0;
1374                         break;
1375                 default:
1376                         ret = -ENXIO;
1377                         break;
1378                 }
1379                 break;
1380         case KVM_S390_VM_MIGRATION:
1381                 ret = 0;
1382                 break;
1383         default:
1384                 ret = -ENXIO;
1385                 break;
1386         }
1387
1388         return ret;
1389 }
1390
1391 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1392 {
1393         uint8_t *keys;
1394         uint64_t hva;
1395         int srcu_idx, i, r = 0;
1396
1397         if (args->flags != 0)
1398                 return -EINVAL;
1399
1400         /* Is this guest using storage keys? */
1401         if (!mm_use_skey(current->mm))
1402                 return KVM_S390_GET_SKEYS_NONE;
1403
1404         /* Enforce sane limit on memory allocation */
1405         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1406                 return -EINVAL;
1407
1408         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1409         if (!keys)
1410                 return -ENOMEM;
1411
1412         down_read(&current->mm->mmap_sem);
1413         srcu_idx = srcu_read_lock(&kvm->srcu);
1414         for (i = 0; i < args->count; i++) {
1415                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1416                 if (kvm_is_error_hva(hva)) {
1417                         r = -EFAULT;
1418                         break;
1419                 }
1420
1421                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1422                 if (r)
1423                         break;
1424         }
1425         srcu_read_unlock(&kvm->srcu, srcu_idx);
1426         up_read(&current->mm->mmap_sem);
1427
1428         if (!r) {
1429                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1430                                  sizeof(uint8_t) * args->count);
1431                 if (r)
1432                         r = -EFAULT;
1433         }
1434
1435         kvfree(keys);
1436         return r;
1437 }
1438
1439 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1440 {
1441         uint8_t *keys;
1442         uint64_t hva;
1443         int srcu_idx, i, r = 0;
1444
1445         if (args->flags != 0)
1446                 return -EINVAL;
1447
1448         /* Enforce sane limit on memory allocation */
1449         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1450                 return -EINVAL;
1451
1452         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1453         if (!keys)
1454                 return -ENOMEM;
1455
1456         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1457                            sizeof(uint8_t) * args->count);
1458         if (r) {
1459                 r = -EFAULT;
1460                 goto out;
1461         }
1462
1463         /* Enable storage key handling for the guest */
1464         r = s390_enable_skey();
1465         if (r)
1466                 goto out;
1467
1468         down_read(&current->mm->mmap_sem);
1469         srcu_idx = srcu_read_lock(&kvm->srcu);
1470         for (i = 0; i < args->count; i++) {
1471                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1472                 if (kvm_is_error_hva(hva)) {
1473                         r = -EFAULT;
1474                         break;
1475                 }
1476
1477                 /* Lowest order bit is reserved */
1478                 if (keys[i] & 0x01) {
1479                         r = -EINVAL;
1480                         break;
1481                 }
1482
1483                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1484                 if (r)
1485                         break;
1486         }
1487         srcu_read_unlock(&kvm->srcu, srcu_idx);
1488         up_read(&current->mm->mmap_sem);
1489 out:
1490         kvfree(keys);
1491         return r;
1492 }
1493
1494 /*
1495  * Base address and length must be sent at the start of each block, therefore
1496  * it's cheaper to send some clean data, as long as it's less than the size of
1497  * two longs.
1498  */
1499 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1500 /* for consistency */
1501 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1502
1503 /*
1504  * This function searches for the next page with dirty CMMA attributes, and
1505  * saves the attributes in the buffer up to either the end of the buffer or
1506  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1507  * no trailing clean bytes are saved.
1508  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1509  * output buffer will indicate 0 as length.
1510  */
1511 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1512                                   struct kvm_s390_cmma_log *args)
1513 {
1514         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1515         unsigned long bufsize, hva, pgstev, i, next, cur;
1516         int srcu_idx, peek, r = 0, rr;
1517         u8 *res;
1518
1519         cur = args->start_gfn;
1520         i = next = pgstev = 0;
1521
1522         if (unlikely(!kvm->arch.use_cmma))
1523                 return -ENXIO;
1524         /* Invalid/unsupported flags were specified */
1525         if (args->flags & ~KVM_S390_CMMA_PEEK)
1526                 return -EINVAL;
1527         /* Migration mode query, and we are not doing a migration */
1528         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1529         if (!peek && !s)
1530                 return -EINVAL;
1531         /* CMMA is disabled or was not used, or the buffer has length zero */
1532         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1533         if (!bufsize || !kvm->mm->context.use_cmma) {
1534                 memset(args, 0, sizeof(*args));
1535                 return 0;
1536         }
1537
1538         if (!peek) {
1539                 /* We are not peeking, and there are no dirty pages */
1540                 if (!atomic64_read(&s->dirty_pages)) {
1541                         memset(args, 0, sizeof(*args));
1542                         return 0;
1543                 }
1544                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1545                                     args->start_gfn);
1546                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1547                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1548                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1549                         memset(args, 0, sizeof(*args));
1550                         return 0;
1551                 }
1552                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1553         }
1554
1555         res = vmalloc(bufsize);
1556         if (!res)
1557                 return -ENOMEM;
1558
1559         args->start_gfn = cur;
1560
1561         down_read(&kvm->mm->mmap_sem);
1562         srcu_idx = srcu_read_lock(&kvm->srcu);
1563         while (i < bufsize) {
1564                 hva = gfn_to_hva(kvm, cur);
1565                 if (kvm_is_error_hva(hva)) {
1566                         r = -EFAULT;
1567                         break;
1568                 }
1569                 /* decrement only if we actually flipped the bit to 0 */
1570                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1571                         atomic64_dec(&s->dirty_pages);
1572                 r = get_pgste(kvm->mm, hva, &pgstev);
1573                 if (r < 0)
1574                         pgstev = 0;
1575                 /* save the value */
1576                 res[i++] = (pgstev >> 24) & 0x43;
1577                 /*
1578                  * if the next bit is too far away, stop.
1579                  * if we reached the previous "next", find the next one
1580                  */
1581                 if (!peek) {
1582                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1583                                 break;
1584                         if (cur == next)
1585                                 next = find_next_bit(s->pgste_bitmap,
1586                                                      s->bitmap_size, cur + 1);
1587                 /* reached the end of the bitmap or of the buffer, stop */
1588                         if ((next >= s->bitmap_size) ||
1589                             (next >= args->start_gfn + bufsize))
1590                                 break;
1591                 }
1592                 cur++;
1593         }
1594         srcu_read_unlock(&kvm->srcu, srcu_idx);
1595         up_read(&kvm->mm->mmap_sem);
1596         args->count = i;
1597         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1598
1599         rr = copy_to_user((void __user *)args->values, res, args->count);
1600         if (rr)
1601                 r = -EFAULT;
1602
1603         vfree(res);
1604         return r;
1605 }
1606
1607 /*
1608  * This function sets the CMMA attributes for the given pages. If the input
1609  * buffer has zero length, no action is taken, otherwise the attributes are
1610  * set and the mm->context.use_cmma flag is set.
1611  */
1612 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1613                                   const struct kvm_s390_cmma_log *args)
1614 {
1615         unsigned long hva, mask, pgstev, i;
1616         uint8_t *bits;
1617         int srcu_idx, r = 0;
1618
1619         mask = args->mask;
1620
1621         if (!kvm->arch.use_cmma)
1622                 return -ENXIO;
1623         /* invalid/unsupported flags */
1624         if (args->flags != 0)
1625                 return -EINVAL;
1626         /* Enforce sane limit on memory allocation */
1627         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1628                 return -EINVAL;
1629         /* Nothing to do */
1630         if (args->count == 0)
1631                 return 0;
1632
1633         bits = vmalloc(sizeof(*bits) * args->count);
1634         if (!bits)
1635                 return -ENOMEM;
1636
1637         r = copy_from_user(bits, (void __user *)args->values, args->count);
1638         if (r) {
1639                 r = -EFAULT;
1640                 goto out;
1641         }
1642
1643         down_read(&kvm->mm->mmap_sem);
1644         srcu_idx = srcu_read_lock(&kvm->srcu);
1645         for (i = 0; i < args->count; i++) {
1646                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1647                 if (kvm_is_error_hva(hva)) {
1648                         r = -EFAULT;
1649                         break;
1650                 }
1651
1652                 pgstev = bits[i];
1653                 pgstev = pgstev << 24;
1654                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1655                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1656         }
1657         srcu_read_unlock(&kvm->srcu, srcu_idx);
1658         up_read(&kvm->mm->mmap_sem);
1659
1660         if (!kvm->mm->context.use_cmma) {
1661                 down_write(&kvm->mm->mmap_sem);
1662                 kvm->mm->context.use_cmma = 1;
1663                 up_write(&kvm->mm->mmap_sem);
1664         }
1665 out:
1666         vfree(bits);
1667         return r;
1668 }
1669
1670 long kvm_arch_vm_ioctl(struct file *filp,
1671                        unsigned int ioctl, unsigned long arg)
1672 {
1673         struct kvm *kvm = filp->private_data;
1674         void __user *argp = (void __user *)arg;
1675         struct kvm_device_attr attr;
1676         int r;
1677
1678         switch (ioctl) {
1679         case KVM_S390_INTERRUPT: {
1680                 struct kvm_s390_interrupt s390int;
1681
1682                 r = -EFAULT;
1683                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1684                         break;
1685                 r = kvm_s390_inject_vm(kvm, &s390int);
1686                 break;
1687         }
1688         case KVM_ENABLE_CAP: {
1689                 struct kvm_enable_cap cap;
1690                 r = -EFAULT;
1691                 if (copy_from_user(&cap, argp, sizeof(cap)))
1692                         break;
1693                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1694                 break;
1695         }
1696         case KVM_CREATE_IRQCHIP: {
1697                 struct kvm_irq_routing_entry routing;
1698
1699                 r = -EINVAL;
1700                 if (kvm->arch.use_irqchip) {
1701                         /* Set up dummy routing. */
1702                         memset(&routing, 0, sizeof(routing));
1703                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1704                 }
1705                 break;
1706         }
1707         case KVM_SET_DEVICE_ATTR: {
1708                 r = -EFAULT;
1709                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1710                         break;
1711                 r = kvm_s390_vm_set_attr(kvm, &attr);
1712                 break;
1713         }
1714         case KVM_GET_DEVICE_ATTR: {
1715                 r = -EFAULT;
1716                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1717                         break;
1718                 r = kvm_s390_vm_get_attr(kvm, &attr);
1719                 break;
1720         }
1721         case KVM_HAS_DEVICE_ATTR: {
1722                 r = -EFAULT;
1723                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1724                         break;
1725                 r = kvm_s390_vm_has_attr(kvm, &attr);
1726                 break;
1727         }
1728         case KVM_S390_GET_SKEYS: {
1729                 struct kvm_s390_skeys args;
1730
1731                 r = -EFAULT;
1732                 if (copy_from_user(&args, argp,
1733                                    sizeof(struct kvm_s390_skeys)))
1734                         break;
1735                 r = kvm_s390_get_skeys(kvm, &args);
1736                 break;
1737         }
1738         case KVM_S390_SET_SKEYS: {
1739                 struct kvm_s390_skeys args;
1740
1741                 r = -EFAULT;
1742                 if (copy_from_user(&args, argp,
1743                                    sizeof(struct kvm_s390_skeys)))
1744                         break;
1745                 r = kvm_s390_set_skeys(kvm, &args);
1746                 break;
1747         }
1748         case KVM_S390_GET_CMMA_BITS: {
1749                 struct kvm_s390_cmma_log args;
1750
1751                 r = -EFAULT;
1752                 if (copy_from_user(&args, argp, sizeof(args)))
1753                         break;
1754                 r = kvm_s390_get_cmma_bits(kvm, &args);
1755                 if (!r) {
1756                         r = copy_to_user(argp, &args, sizeof(args));
1757                         if (r)
1758                                 r = -EFAULT;
1759                 }
1760                 break;
1761         }
1762         case KVM_S390_SET_CMMA_BITS: {
1763                 struct kvm_s390_cmma_log args;
1764
1765                 r = -EFAULT;
1766                 if (copy_from_user(&args, argp, sizeof(args)))
1767                         break;
1768                 r = kvm_s390_set_cmma_bits(kvm, &args);
1769                 break;
1770         }
1771         default:
1772                 r = -ENOTTY;
1773         }
1774
1775         return r;
1776 }
1777
1778 static int kvm_s390_query_ap_config(u8 *config)
1779 {
1780         u32 fcn_code = 0x04000000UL;
1781         u32 cc = 0;
1782
1783         memset(config, 0, 128);
1784         asm volatile(
1785                 "lgr 0,%1\n"
1786                 "lgr 2,%2\n"
1787                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1788                 "0: ipm %0\n"
1789                 "srl %0,28\n"
1790                 "1:\n"
1791                 EX_TABLE(0b, 1b)
1792                 : "+r" (cc)
1793                 : "r" (fcn_code), "r" (config)
1794                 : "cc", "0", "2", "memory"
1795         );
1796
1797         return cc;
1798 }
1799
1800 static int kvm_s390_apxa_installed(void)
1801 {
1802         u8 config[128];
1803         int cc;
1804
1805         if (test_facility(12)) {
1806                 cc = kvm_s390_query_ap_config(config);
1807
1808                 if (cc)
1809                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1810                 else
1811                         return config[0] & 0x40;
1812         }
1813
1814         return 0;
1815 }
1816
1817 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1818 {
1819         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1820
1821         if (kvm_s390_apxa_installed())
1822                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1823         else
1824                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1825 }
1826
1827 static u64 kvm_s390_get_initial_cpuid(void)
1828 {
1829         struct cpuid cpuid;
1830
1831         get_cpu_id(&cpuid);
1832         cpuid.version = 0xff;
1833         return *((u64 *) &cpuid);
1834 }
1835
1836 static void kvm_s390_crypto_init(struct kvm *kvm)
1837 {
1838         if (!test_kvm_facility(kvm, 76))
1839                 return;
1840
1841         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1842         kvm_s390_set_crycb_format(kvm);
1843
1844         /* Enable AES/DEA protected key functions by default */
1845         kvm->arch.crypto.aes_kw = 1;
1846         kvm->arch.crypto.dea_kw = 1;
1847         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1848                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1849         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1850                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1851 }
1852
1853 static void sca_dispose(struct kvm *kvm)
1854 {
1855         if (kvm->arch.use_esca)
1856                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1857         else
1858                 free_page((unsigned long)(kvm->arch.sca));
1859         kvm->arch.sca = NULL;
1860 }
1861
1862 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1863 {
1864         gfp_t alloc_flags = GFP_KERNEL;
1865         int i, rc;
1866         char debug_name[16];
1867         static unsigned long sca_offset;
1868
1869         rc = -EINVAL;
1870 #ifdef CONFIG_KVM_S390_UCONTROL
1871         if (type & ~KVM_VM_S390_UCONTROL)
1872                 goto out_err;
1873         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1874                 goto out_err;
1875 #else
1876         if (type)
1877                 goto out_err;
1878 #endif
1879
1880         rc = s390_enable_sie();
1881         if (rc)
1882                 goto out_err;
1883
1884         rc = -ENOMEM;
1885
1886         kvm->arch.use_esca = 0; /* start with basic SCA */
1887         if (!sclp.has_64bscao)
1888                 alloc_flags |= GFP_DMA;
1889         rwlock_init(&kvm->arch.sca_lock);
1890         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1891         if (!kvm->arch.sca)
1892                 goto out_err;
1893         spin_lock(&kvm_lock);
1894         sca_offset += 16;
1895         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1896                 sca_offset = 0;
1897         kvm->arch.sca = (struct bsca_block *)
1898                         ((char *) kvm->arch.sca + sca_offset);
1899         spin_unlock(&kvm_lock);
1900
1901         sprintf(debug_name, "kvm-%u", current->pid);
1902
1903         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1904         if (!kvm->arch.dbf)
1905                 goto out_err;
1906
1907         kvm->arch.sie_page2 =
1908              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1909         if (!kvm->arch.sie_page2)
1910                 goto out_err;
1911
1912         /* Populate the facility mask initially. */
1913         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1914                sizeof(S390_lowcore.stfle_fac_list));
1915         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1916                 if (i < kvm_s390_fac_list_mask_size())
1917                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1918                 else
1919                         kvm->arch.model.fac_mask[i] = 0UL;
1920         }
1921
1922         /* Populate the facility list initially. */
1923         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1924         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1925                S390_ARCH_FAC_LIST_SIZE_BYTE);
1926
1927         /* we are always in czam mode - even on pre z14 machines */
1928         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1929         set_kvm_facility(kvm->arch.model.fac_list, 138);
1930         /* we emulate STHYI in kvm */
1931         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1932         set_kvm_facility(kvm->arch.model.fac_list, 74);
1933         if (MACHINE_HAS_TLB_GUEST) {
1934                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1935                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1936         }
1937
1938         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1939         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1940
1941         kvm_s390_crypto_init(kvm);
1942
1943         mutex_init(&kvm->arch.float_int.ais_lock);
1944         kvm->arch.float_int.simm = 0;
1945         kvm->arch.float_int.nimm = 0;
1946         spin_lock_init(&kvm->arch.float_int.lock);
1947         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1948                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1949         init_waitqueue_head(&kvm->arch.ipte_wq);
1950         mutex_init(&kvm->arch.ipte_mutex);
1951
1952         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1953         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1954
1955         if (type & KVM_VM_S390_UCONTROL) {
1956                 kvm->arch.gmap = NULL;
1957                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1958         } else {
1959                 if (sclp.hamax == U64_MAX)
1960                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1961                 else
1962                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1963                                                     sclp.hamax + 1);
1964                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1965                 if (!kvm->arch.gmap)
1966                         goto out_err;
1967                 kvm->arch.gmap->private = kvm;
1968                 kvm->arch.gmap->pfault_enabled = 0;
1969         }
1970
1971         kvm->arch.css_support = 0;
1972         kvm->arch.use_irqchip = 0;
1973         kvm->arch.epoch = 0;
1974
1975         spin_lock_init(&kvm->arch.start_stop_lock);
1976         kvm_s390_vsie_init(kvm);
1977         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1978
1979         return 0;
1980 out_err:
1981         free_page((unsigned long)kvm->arch.sie_page2);
1982         debug_unregister(kvm->arch.dbf);
1983         sca_dispose(kvm);
1984         KVM_EVENT(3, "creation of vm failed: %d", rc);
1985         return rc;
1986 }
1987
1988 bool kvm_arch_has_vcpu_debugfs(void)
1989 {
1990         return false;
1991 }
1992
1993 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1994 {
1995         return 0;
1996 }
1997
1998 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1999 {
2000         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2001         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2002         kvm_s390_clear_local_irqs(vcpu);
2003         kvm_clear_async_pf_completion_queue(vcpu);
2004         if (!kvm_is_ucontrol(vcpu->kvm))
2005                 sca_del_vcpu(vcpu);
2006
2007         if (kvm_is_ucontrol(vcpu->kvm))
2008                 gmap_remove(vcpu->arch.gmap);
2009
2010         if (vcpu->kvm->arch.use_cmma)
2011                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2012         free_page((unsigned long)(vcpu->arch.sie_block));
2013
2014         kvm_vcpu_uninit(vcpu);
2015         kmem_cache_free(kvm_vcpu_cache, vcpu);
2016 }
2017
2018 static void kvm_free_vcpus(struct kvm *kvm)
2019 {
2020         unsigned int i;
2021         struct kvm_vcpu *vcpu;
2022
2023         kvm_for_each_vcpu(i, vcpu, kvm)
2024                 kvm_arch_vcpu_destroy(vcpu);
2025
2026         mutex_lock(&kvm->lock);
2027         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2028                 kvm->vcpus[i] = NULL;
2029
2030         atomic_set(&kvm->online_vcpus, 0);
2031         mutex_unlock(&kvm->lock);
2032 }
2033
2034 void kvm_arch_destroy_vm(struct kvm *kvm)
2035 {
2036         kvm_free_vcpus(kvm);
2037         sca_dispose(kvm);
2038         debug_unregister(kvm->arch.dbf);
2039         free_page((unsigned long)kvm->arch.sie_page2);
2040         if (!kvm_is_ucontrol(kvm))
2041                 gmap_remove(kvm->arch.gmap);
2042         kvm_s390_destroy_adapters(kvm);
2043         kvm_s390_clear_float_irqs(kvm);
2044         kvm_s390_vsie_destroy(kvm);
2045         if (kvm->arch.migration_state) {
2046                 vfree(kvm->arch.migration_state->pgste_bitmap);
2047                 kfree(kvm->arch.migration_state);
2048         }
2049         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2050 }
2051
2052 /* Section: vcpu related */
2053 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2054 {
2055         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2056         if (!vcpu->arch.gmap)
2057                 return -ENOMEM;
2058         vcpu->arch.gmap->private = vcpu->kvm;
2059
2060         return 0;
2061 }
2062
2063 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2064 {
2065         if (!kvm_s390_use_sca_entries())
2066                 return;
2067         read_lock(&vcpu->kvm->arch.sca_lock);
2068         if (vcpu->kvm->arch.use_esca) {
2069                 struct esca_block *sca = vcpu->kvm->arch.sca;
2070
2071                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2072                 sca->cpu[vcpu->vcpu_id].sda = 0;
2073         } else {
2074                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2075
2076                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2077                 sca->cpu[vcpu->vcpu_id].sda = 0;
2078         }
2079         read_unlock(&vcpu->kvm->arch.sca_lock);
2080 }
2081
2082 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2083 {
2084         if (!kvm_s390_use_sca_entries()) {
2085                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2086
2087                 /* we still need the basic sca for the ipte control */
2088                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2089                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2090         }
2091         read_lock(&vcpu->kvm->arch.sca_lock);
2092         if (vcpu->kvm->arch.use_esca) {
2093                 struct esca_block *sca = vcpu->kvm->arch.sca;
2094
2095                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2096                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2097                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2098                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2099                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2100         } else {
2101                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2102
2103                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2104                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2105                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2106                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2107         }
2108         read_unlock(&vcpu->kvm->arch.sca_lock);
2109 }
2110
2111 /* Basic SCA to Extended SCA data copy routines */
2112 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2113 {
2114         d->sda = s->sda;
2115         d->sigp_ctrl.c = s->sigp_ctrl.c;
2116         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2117 }
2118
2119 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2120 {
2121         int i;
2122
2123         d->ipte_control = s->ipte_control;
2124         d->mcn[0] = s->mcn;
2125         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2126                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2127 }
2128
2129 static int sca_switch_to_extended(struct kvm *kvm)
2130 {
2131         struct bsca_block *old_sca = kvm->arch.sca;
2132         struct esca_block *new_sca;
2133         struct kvm_vcpu *vcpu;
2134         unsigned int vcpu_idx;
2135         u32 scaol, scaoh;
2136
2137         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2138         if (!new_sca)
2139                 return -ENOMEM;
2140
2141         scaoh = (u32)((u64)(new_sca) >> 32);
2142         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2143
2144         kvm_s390_vcpu_block_all(kvm);
2145         write_lock(&kvm->arch.sca_lock);
2146
2147         sca_copy_b_to_e(new_sca, old_sca);
2148
2149         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2150                 vcpu->arch.sie_block->scaoh = scaoh;
2151                 vcpu->arch.sie_block->scaol = scaol;
2152                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2153         }
2154         kvm->arch.sca = new_sca;
2155         kvm->arch.use_esca = 1;
2156
2157         write_unlock(&kvm->arch.sca_lock);
2158         kvm_s390_vcpu_unblock_all(kvm);
2159
2160         free_page((unsigned long)old_sca);
2161
2162         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2163                  old_sca, kvm->arch.sca);
2164         return 0;
2165 }
2166
2167 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2168 {
2169         int rc;
2170
2171         if (!kvm_s390_use_sca_entries()) {
2172                 if (id < KVM_MAX_VCPUS)
2173                         return true;
2174                 return false;
2175         }
2176         if (id < KVM_S390_BSCA_CPU_SLOTS)
2177                 return true;
2178         if (!sclp.has_esca || !sclp.has_64bscao)
2179                 return false;
2180
2181         mutex_lock(&kvm->lock);
2182         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2183         mutex_unlock(&kvm->lock);
2184
2185         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2186 }
2187
2188 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2189 {
2190         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2191         kvm_clear_async_pf_completion_queue(vcpu);
2192         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2193                                     KVM_SYNC_GPRS |
2194                                     KVM_SYNC_ACRS |
2195                                     KVM_SYNC_CRS |
2196                                     KVM_SYNC_ARCH0 |
2197                                     KVM_SYNC_PFAULT;
2198         kvm_s390_set_prefix(vcpu, 0);
2199         if (test_kvm_facility(vcpu->kvm, 64))
2200                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2201         if (test_kvm_facility(vcpu->kvm, 133))
2202                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2203         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2204          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2205          */
2206         if (MACHINE_HAS_VX)
2207                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2208         else
2209                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2210
2211         if (kvm_is_ucontrol(vcpu->kvm))
2212                 return __kvm_ucontrol_vcpu_init(vcpu);
2213
2214         return 0;
2215 }
2216
2217 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2218 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2219 {
2220         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2221         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2222         vcpu->arch.cputm_start = get_tod_clock_fast();
2223         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2224 }
2225
2226 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2227 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2228 {
2229         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2230         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2231         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2232         vcpu->arch.cputm_start = 0;
2233         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2234 }
2235
2236 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2237 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2238 {
2239         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2240         vcpu->arch.cputm_enabled = true;
2241         __start_cpu_timer_accounting(vcpu);
2242 }
2243
2244 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2245 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2246 {
2247         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2248         __stop_cpu_timer_accounting(vcpu);
2249         vcpu->arch.cputm_enabled = false;
2250 }
2251
2252 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2253 {
2254         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2255         __enable_cpu_timer_accounting(vcpu);
2256         preempt_enable();
2257 }
2258
2259 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2260 {
2261         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2262         __disable_cpu_timer_accounting(vcpu);
2263         preempt_enable();
2264 }
2265
2266 /* set the cpu timer - may only be called from the VCPU thread itself */
2267 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2268 {
2269         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2270         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2271         if (vcpu->arch.cputm_enabled)
2272                 vcpu->arch.cputm_start = get_tod_clock_fast();
2273         vcpu->arch.sie_block->cputm = cputm;
2274         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2275         preempt_enable();
2276 }
2277
2278 /* update and get the cpu timer - can also be called from other VCPU threads */
2279 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2280 {
2281         unsigned int seq;
2282         __u64 value;
2283
2284         if (unlikely(!vcpu->arch.cputm_enabled))
2285                 return vcpu->arch.sie_block->cputm;
2286
2287         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2288         do {
2289                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2290                 /*
2291                  * If the writer would ever execute a read in the critical
2292                  * section, e.g. in irq context, we have a deadlock.
2293                  */
2294                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2295                 value = vcpu->arch.sie_block->cputm;
2296                 /* if cputm_start is 0, accounting is being started/stopped */
2297                 if (likely(vcpu->arch.cputm_start))
2298                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2299         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2300         preempt_enable();
2301         return value;
2302 }
2303
2304 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2305 {
2306
2307         gmap_enable(vcpu->arch.enabled_gmap);
2308         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2309         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2310                 __start_cpu_timer_accounting(vcpu);
2311         vcpu->cpu = cpu;
2312 }
2313
2314 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2315 {
2316         vcpu->cpu = -1;
2317         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2318                 __stop_cpu_timer_accounting(vcpu);
2319         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2320         vcpu->arch.enabled_gmap = gmap_get_enabled();
2321         gmap_disable(vcpu->arch.enabled_gmap);
2322
2323 }
2324
2325 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2326 {
2327         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2328         vcpu->arch.sie_block->gpsw.mask = 0UL;
2329         vcpu->arch.sie_block->gpsw.addr = 0UL;
2330         kvm_s390_set_prefix(vcpu, 0);
2331         kvm_s390_set_cpu_timer(vcpu, 0);
2332         vcpu->arch.sie_block->ckc       = 0UL;
2333         vcpu->arch.sie_block->todpr     = 0;
2334         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2335         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2336         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2337         /* make sure the new fpc will be lazily loaded */
2338         save_fpu_regs();
2339         current->thread.fpu.fpc = 0;
2340         vcpu->arch.sie_block->gbea = 1;
2341         vcpu->arch.sie_block->pp = 0;
2342         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2343         kvm_clear_async_pf_completion_queue(vcpu);
2344         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2345                 kvm_s390_vcpu_stop(vcpu);
2346         kvm_s390_clear_local_irqs(vcpu);
2347 }
2348
2349 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2350 {
2351         mutex_lock(&vcpu->kvm->lock);
2352         preempt_disable();
2353         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2354         preempt_enable();
2355         mutex_unlock(&vcpu->kvm->lock);
2356         if (!kvm_is_ucontrol(vcpu->kvm)) {
2357                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2358                 sca_add_vcpu(vcpu);
2359         }
2360         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2361                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2362         /* make vcpu_load load the right gmap on the first trigger */
2363         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2364 }
2365
2366 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2367 {
2368         if (!test_kvm_facility(vcpu->kvm, 76))
2369                 return;
2370
2371         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2372
2373         if (vcpu->kvm->arch.crypto.aes_kw)
2374                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2375         if (vcpu->kvm->arch.crypto.dea_kw)
2376                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2377
2378         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2379 }
2380
2381 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2382 {
2383         free_page(vcpu->arch.sie_block->cbrlo);
2384         vcpu->arch.sie_block->cbrlo = 0;
2385 }
2386
2387 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2388 {
2389         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2390         if (!vcpu->arch.sie_block->cbrlo)
2391                 return -ENOMEM;
2392
2393         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2394         return 0;
2395 }
2396
2397 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2398 {
2399         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2400
2401         vcpu->arch.sie_block->ibc = model->ibc;
2402         if (test_kvm_facility(vcpu->kvm, 7))
2403                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2404 }
2405
2406 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2407 {
2408         int rc = 0;
2409
2410         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2411                                                     CPUSTAT_SM |
2412                                                     CPUSTAT_STOPPED);
2413
2414         if (test_kvm_facility(vcpu->kvm, 78))
2415                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2416         else if (test_kvm_facility(vcpu->kvm, 8))
2417                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2418
2419         kvm_s390_vcpu_setup_model(vcpu);
2420
2421         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2422         if (MACHINE_HAS_ESOP)
2423                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2424         if (test_kvm_facility(vcpu->kvm, 9))
2425                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2426         if (test_kvm_facility(vcpu->kvm, 73))
2427                 vcpu->arch.sie_block->ecb |= ECB_TE;
2428
2429         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2430                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2431         if (test_kvm_facility(vcpu->kvm, 130))
2432                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2433         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2434         if (sclp.has_cei)
2435                 vcpu->arch.sie_block->eca |= ECA_CEI;
2436         if (sclp.has_ib)
2437                 vcpu->arch.sie_block->eca |= ECA_IB;
2438         if (sclp.has_siif)
2439                 vcpu->arch.sie_block->eca |= ECA_SII;
2440         if (sclp.has_sigpif)
2441                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2442         if (test_kvm_facility(vcpu->kvm, 129)) {
2443                 vcpu->arch.sie_block->eca |= ECA_VX;
2444                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2445         }
2446         if (test_kvm_facility(vcpu->kvm, 139))
2447                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2448
2449         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2450                                         | SDNXC;
2451         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2452
2453         if (sclp.has_kss)
2454                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2455         else
2456                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2457
2458         if (vcpu->kvm->arch.use_cmma) {
2459                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2460                 if (rc)
2461                         return rc;
2462         }
2463         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2464         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2465
2466         kvm_s390_vcpu_crypto_setup(vcpu);
2467
2468         return rc;
2469 }
2470
2471 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2472                                       unsigned int id)
2473 {
2474         struct kvm_vcpu *vcpu;
2475         struct sie_page *sie_page;
2476         int rc = -EINVAL;
2477
2478         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2479                 goto out;
2480
2481         rc = -ENOMEM;
2482
2483         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2484         if (!vcpu)
2485                 goto out;
2486
2487         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2488         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2489         if (!sie_page)
2490                 goto out_free_cpu;
2491
2492         vcpu->arch.sie_block = &sie_page->sie_block;
2493         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2494
2495         /* the real guest size will always be smaller than msl */
2496         vcpu->arch.sie_block->mso = 0;
2497         vcpu->arch.sie_block->msl = sclp.hamax;
2498
2499         vcpu->arch.sie_block->icpua = id;
2500         spin_lock_init(&vcpu->arch.local_int.lock);
2501         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2502         vcpu->arch.local_int.wq = &vcpu->wq;
2503         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2504         seqcount_init(&vcpu->arch.cputm_seqcount);
2505
2506         rc = kvm_vcpu_init(vcpu, kvm, id);
2507         if (rc)
2508                 goto out_free_sie_block;
2509         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2510                  vcpu->arch.sie_block);
2511         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2512
2513         return vcpu;
2514 out_free_sie_block:
2515         free_page((unsigned long)(vcpu->arch.sie_block));
2516 out_free_cpu:
2517         kmem_cache_free(kvm_vcpu_cache, vcpu);
2518 out:
2519         return ERR_PTR(rc);
2520 }
2521
2522 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2523 {
2524         return kvm_s390_vcpu_has_irq(vcpu, 0);
2525 }
2526
2527 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2528 {
2529         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2530 }
2531
2532 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2533 {
2534         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2535         exit_sie(vcpu);
2536 }
2537
2538 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2539 {
2540         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2541 }
2542
2543 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2544 {
2545         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2546         exit_sie(vcpu);
2547 }
2548
2549 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2550 {
2551         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2552 }
2553
2554 /*
2555  * Kick a guest cpu out of SIE and wait until SIE is not running.
2556  * If the CPU is not running (e.g. waiting as idle) the function will
2557  * return immediately. */
2558 void exit_sie(struct kvm_vcpu *vcpu)
2559 {
2560         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2561         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2562                 cpu_relax();
2563 }
2564
2565 /* Kick a guest cpu out of SIE to process a request synchronously */
2566 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2567 {
2568         kvm_make_request(req, vcpu);
2569         kvm_s390_vcpu_request(vcpu);
2570 }
2571
2572 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2573                               unsigned long end)
2574 {
2575         struct kvm *kvm = gmap->private;
2576         struct kvm_vcpu *vcpu;
2577         unsigned long prefix;
2578         int i;
2579
2580         if (gmap_is_shadow(gmap))
2581                 return;
2582         if (start >= 1UL << 31)
2583                 /* We are only interested in prefix pages */
2584                 return;
2585         kvm_for_each_vcpu(i, vcpu, kvm) {
2586                 /* match against both prefix pages */
2587                 prefix = kvm_s390_get_prefix(vcpu);
2588                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2589                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2590                                    start, end);
2591                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2592                 }
2593         }
2594 }
2595
2596 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2597 {
2598         /* kvm common code refers to this, but never calls it */
2599         BUG();
2600         return 0;
2601 }
2602
2603 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2604                                            struct kvm_one_reg *reg)
2605 {
2606         int r = -EINVAL;
2607
2608         switch (reg->id) {
2609         case KVM_REG_S390_TODPR:
2610                 r = put_user(vcpu->arch.sie_block->todpr,
2611                              (u32 __user *)reg->addr);
2612                 break;
2613         case KVM_REG_S390_EPOCHDIFF:
2614                 r = put_user(vcpu->arch.sie_block->epoch,
2615                              (u64 __user *)reg->addr);
2616                 break;
2617         case KVM_REG_S390_CPU_TIMER:
2618                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2619                              (u64 __user *)reg->addr);
2620                 break;
2621         case KVM_REG_S390_CLOCK_COMP:
2622                 r = put_user(vcpu->arch.sie_block->ckc,
2623                              (u64 __user *)reg->addr);
2624                 break;
2625         case KVM_REG_S390_PFTOKEN:
2626                 r = put_user(vcpu->arch.pfault_token,
2627                              (u64 __user *)reg->addr);
2628                 break;
2629         case KVM_REG_S390_PFCOMPARE:
2630                 r = put_user(vcpu->arch.pfault_compare,
2631                              (u64 __user *)reg->addr);
2632                 break;
2633         case KVM_REG_S390_PFSELECT:
2634                 r = put_user(vcpu->arch.pfault_select,
2635                              (u64 __user *)reg->addr);
2636                 break;
2637         case KVM_REG_S390_PP:
2638                 r = put_user(vcpu->arch.sie_block->pp,
2639                              (u64 __user *)reg->addr);
2640                 break;
2641         case KVM_REG_S390_GBEA:
2642                 r = put_user(vcpu->arch.sie_block->gbea,
2643                              (u64 __user *)reg->addr);
2644                 break;
2645         default:
2646                 break;
2647         }
2648
2649         return r;
2650 }
2651
2652 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2653                                            struct kvm_one_reg *reg)
2654 {
2655         int r = -EINVAL;
2656         __u64 val;
2657
2658         switch (reg->id) {
2659         case KVM_REG_S390_TODPR:
2660                 r = get_user(vcpu->arch.sie_block->todpr,
2661                              (u32 __user *)reg->addr);
2662                 break;
2663         case KVM_REG_S390_EPOCHDIFF:
2664                 r = get_user(vcpu->arch.sie_block->epoch,
2665                              (u64 __user *)reg->addr);
2666                 break;
2667         case KVM_REG_S390_CPU_TIMER:
2668                 r = get_user(val, (u64 __user *)reg->addr);
2669                 if (!r)
2670                         kvm_s390_set_cpu_timer(vcpu, val);
2671                 break;
2672         case KVM_REG_S390_CLOCK_COMP:
2673                 r = get_user(vcpu->arch.sie_block->ckc,
2674                              (u64 __user *)reg->addr);
2675                 break;
2676         case KVM_REG_S390_PFTOKEN:
2677                 r = get_user(vcpu->arch.pfault_token,
2678                              (u64 __user *)reg->addr);
2679                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2680                         kvm_clear_async_pf_completion_queue(vcpu);
2681                 break;
2682         case KVM_REG_S390_PFCOMPARE:
2683                 r = get_user(vcpu->arch.pfault_compare,
2684                              (u64 __user *)reg->addr);
2685                 break;
2686         case KVM_REG_S390_PFSELECT:
2687                 r = get_user(vcpu->arch.pfault_select,
2688                              (u64 __user *)reg->addr);
2689                 break;
2690         case KVM_REG_S390_PP:
2691                 r = get_user(vcpu->arch.sie_block->pp,
2692                              (u64 __user *)reg->addr);
2693                 break;
2694         case KVM_REG_S390_GBEA:
2695                 r = get_user(vcpu->arch.sie_block->gbea,
2696                              (u64 __user *)reg->addr);
2697                 break;
2698         default:
2699                 break;
2700         }
2701
2702         return r;
2703 }
2704
2705 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2706 {
2707         kvm_s390_vcpu_initial_reset(vcpu);
2708         return 0;
2709 }
2710
2711 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2712 {
2713         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2714         return 0;
2715 }
2716
2717 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2718 {
2719         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2720         return 0;
2721 }
2722
2723 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2724                                   struct kvm_sregs *sregs)
2725 {
2726         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2727         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2728         return 0;
2729 }
2730
2731 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2732                                   struct kvm_sregs *sregs)
2733 {
2734         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2735         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2736         return 0;
2737 }
2738
2739 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2740 {
2741         if (test_fp_ctl(fpu->fpc))
2742                 return -EINVAL;
2743         vcpu->run->s.regs.fpc = fpu->fpc;
2744         if (MACHINE_HAS_VX)
2745                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2746                                  (freg_t *) fpu->fprs);
2747         else
2748                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2749         return 0;
2750 }
2751
2752 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2753 {
2754         /* make sure we have the latest values */
2755         save_fpu_regs();
2756         if (MACHINE_HAS_VX)
2757                 convert_vx_to_fp((freg_t *) fpu->fprs,
2758                                  (__vector128 *) vcpu->run->s.regs.vrs);
2759         else
2760                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2761         fpu->fpc = vcpu->run->s.regs.fpc;
2762         return 0;
2763 }
2764
2765 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2766 {
2767         int rc = 0;
2768
2769         if (!is_vcpu_stopped(vcpu))
2770                 rc = -EBUSY;
2771         else {
2772                 vcpu->run->psw_mask = psw.mask;
2773                 vcpu->run->psw_addr = psw.addr;
2774         }
2775         return rc;
2776 }
2777
2778 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2779                                   struct kvm_translation *tr)
2780 {
2781         return -EINVAL; /* not implemented yet */
2782 }
2783
2784 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2785                               KVM_GUESTDBG_USE_HW_BP | \
2786                               KVM_GUESTDBG_ENABLE)
2787
2788 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2789                                         struct kvm_guest_debug *dbg)
2790 {
2791         int rc = 0;
2792
2793         vcpu->guest_debug = 0;
2794         kvm_s390_clear_bp_data(vcpu);
2795
2796         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2797                 return -EINVAL;
2798         if (!sclp.has_gpere)
2799                 return -EINVAL;
2800
2801         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2802                 vcpu->guest_debug = dbg->control;
2803                 /* enforce guest PER */
2804                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2805
2806                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2807                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2808         } else {
2809                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2810                 vcpu->arch.guestdbg.last_bp = 0;
2811         }
2812
2813         if (rc) {
2814                 vcpu->guest_debug = 0;
2815                 kvm_s390_clear_bp_data(vcpu);
2816                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2817         }
2818
2819         return rc;
2820 }
2821
2822 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2823                                     struct kvm_mp_state *mp_state)
2824 {
2825         /* CHECK_STOP and LOAD are not supported yet */
2826         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2827                                        KVM_MP_STATE_OPERATING;
2828 }
2829
2830 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2831                                     struct kvm_mp_state *mp_state)
2832 {
2833         int rc = 0;
2834
2835         /* user space knows about this interface - let it control the state */
2836         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2837
2838         switch (mp_state->mp_state) {
2839         case KVM_MP_STATE_STOPPED:
2840                 kvm_s390_vcpu_stop(vcpu);
2841                 break;
2842         case KVM_MP_STATE_OPERATING:
2843                 kvm_s390_vcpu_start(vcpu);
2844                 break;
2845         case KVM_MP_STATE_LOAD:
2846         case KVM_MP_STATE_CHECK_STOP:
2847                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2848         default:
2849                 rc = -ENXIO;
2850         }
2851
2852         return rc;
2853 }
2854
2855 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2856 {
2857         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2858 }
2859
2860 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2861 {
2862 retry:
2863         kvm_s390_vcpu_request_handled(vcpu);
2864         if (!kvm_request_pending(vcpu))
2865                 return 0;
2866         /*
2867          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2868          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2869          * This ensures that the ipte instruction for this request has
2870          * already finished. We might race against a second unmapper that
2871          * wants to set the blocking bit. Lets just retry the request loop.
2872          */
2873         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2874                 int rc;
2875                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2876                                           kvm_s390_get_prefix(vcpu),
2877                                           PAGE_SIZE * 2, PROT_WRITE);
2878                 if (rc) {
2879                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2880                         return rc;
2881                 }
2882                 goto retry;
2883         }
2884
2885         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2886                 vcpu->arch.sie_block->ihcpu = 0xffff;
2887                 goto retry;
2888         }
2889
2890         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2891                 if (!ibs_enabled(vcpu)) {
2892                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2893                         atomic_or(CPUSTAT_IBS,
2894                                         &vcpu->arch.sie_block->cpuflags);
2895                 }
2896                 goto retry;
2897         }
2898
2899         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2900                 if (ibs_enabled(vcpu)) {
2901                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2902                         atomic_andnot(CPUSTAT_IBS,
2903                                           &vcpu->arch.sie_block->cpuflags);
2904                 }
2905                 goto retry;
2906         }
2907
2908         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2909                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2910                 goto retry;
2911         }
2912
2913         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2914                 /*
2915                  * Disable CMMA virtualization; we will emulate the ESSA
2916                  * instruction manually, in order to provide additional
2917                  * functionalities needed for live migration.
2918                  */
2919                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2920                 goto retry;
2921         }
2922
2923         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2924                 /*
2925                  * Re-enable CMMA virtualization if CMMA is available and
2926                  * was used.
2927                  */
2928                 if ((vcpu->kvm->arch.use_cmma) &&
2929                     (vcpu->kvm->mm->context.use_cmma))
2930                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2931                 goto retry;
2932         }
2933
2934         /* nothing to do, just clear the request */
2935         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2936
2937         return 0;
2938 }
2939
2940 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2941                                  const struct kvm_s390_vm_tod_clock *gtod)
2942 {
2943         struct kvm_vcpu *vcpu;
2944         struct kvm_s390_tod_clock_ext htod;
2945         int i;
2946
2947         mutex_lock(&kvm->lock);
2948         preempt_disable();
2949
2950         get_tod_clock_ext((char *)&htod);
2951
2952         kvm->arch.epoch = gtod->tod - htod.tod;
2953         kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2954
2955         if (kvm->arch.epoch > gtod->tod)
2956                 kvm->arch.epdx -= 1;
2957
2958         kvm_s390_vcpu_block_all(kvm);
2959         kvm_for_each_vcpu(i, vcpu, kvm) {
2960                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2961                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2962         }
2963
2964         kvm_s390_vcpu_unblock_all(kvm);
2965         preempt_enable();
2966         mutex_unlock(&kvm->lock);
2967 }
2968
2969 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2970 {
2971         struct kvm_vcpu *vcpu;
2972         int i;
2973
2974         mutex_lock(&kvm->lock);
2975         preempt_disable();
2976         kvm->arch.epoch = tod - get_tod_clock();
2977         kvm_s390_vcpu_block_all(kvm);
2978         kvm_for_each_vcpu(i, vcpu, kvm)
2979                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2980         kvm_s390_vcpu_unblock_all(kvm);
2981         preempt_enable();
2982         mutex_unlock(&kvm->lock);
2983 }
2984
2985 /**
2986  * kvm_arch_fault_in_page - fault-in guest page if necessary
2987  * @vcpu: The corresponding virtual cpu
2988  * @gpa: Guest physical address
2989  * @writable: Whether the page should be writable or not
2990  *
2991  * Make sure that a guest page has been faulted-in on the host.
2992  *
2993  * Return: Zero on success, negative error code otherwise.
2994  */
2995 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2996 {
2997         return gmap_fault(vcpu->arch.gmap, gpa,
2998                           writable ? FAULT_FLAG_WRITE : 0);
2999 }
3000
3001 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3002                                       unsigned long token)
3003 {
3004         struct kvm_s390_interrupt inti;
3005         struct kvm_s390_irq irq;
3006
3007         if (start_token) {
3008                 irq.u.ext.ext_params2 = token;
3009                 irq.type = KVM_S390_INT_PFAULT_INIT;
3010                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3011         } else {
3012                 inti.type = KVM_S390_INT_PFAULT_DONE;
3013                 inti.parm64 = token;
3014                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3015         }
3016 }
3017
3018 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3019                                      struct kvm_async_pf *work)
3020 {
3021         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3022         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3023 }
3024
3025 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3026                                  struct kvm_async_pf *work)
3027 {
3028         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3029         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3030 }
3031
3032 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3033                                struct kvm_async_pf *work)
3034 {
3035         /* s390 will always inject the page directly */
3036 }
3037
3038 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3039 {
3040         /*
3041          * s390 will always inject the page directly,
3042          * but we still want check_async_completion to cleanup
3043          */
3044         return true;
3045 }
3046
3047 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3048 {
3049         hva_t hva;
3050         struct kvm_arch_async_pf arch;
3051         int rc;
3052
3053         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3054                 return 0;
3055         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3056             vcpu->arch.pfault_compare)
3057                 return 0;
3058         if (psw_extint_disabled(vcpu))
3059                 return 0;
3060         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3061                 return 0;
3062         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3063                 return 0;
3064         if (!vcpu->arch.gmap->pfault_enabled)
3065                 return 0;
3066
3067         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3068         hva += current->thread.gmap_addr & ~PAGE_MASK;
3069         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3070                 return 0;
3071
3072         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3073         return rc;
3074 }
3075
3076 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3077 {
3078         int rc, cpuflags;
3079
3080         /*
3081          * On s390 notifications for arriving pages will be delivered directly
3082          * to the guest but the house keeping for completed pfaults is
3083          * handled outside the worker.
3084          */
3085         kvm_check_async_pf_completion(vcpu);
3086
3087         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3088         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3089
3090         if (need_resched())
3091                 schedule();
3092
3093         if (test_cpu_flag(CIF_MCCK_PENDING))
3094                 s390_handle_mcck();
3095
3096         if (!kvm_is_ucontrol(vcpu->kvm)) {
3097                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3098                 if (rc)
3099                         return rc;
3100         }
3101
3102         rc = kvm_s390_handle_requests(vcpu);
3103         if (rc)
3104                 return rc;
3105
3106         if (guestdbg_enabled(vcpu)) {
3107                 kvm_s390_backup_guest_per_regs(vcpu);
3108                 kvm_s390_patch_guest_per_regs(vcpu);
3109         }
3110
3111         vcpu->arch.sie_block->icptcode = 0;
3112         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3113         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3114         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3115
3116         return 0;
3117 }
3118
3119 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3120 {
3121         struct kvm_s390_pgm_info pgm_info = {
3122                 .code = PGM_ADDRESSING,
3123         };
3124         u8 opcode, ilen;
3125         int rc;
3126
3127         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3128         trace_kvm_s390_sie_fault(vcpu);
3129
3130         /*
3131          * We want to inject an addressing exception, which is defined as a
3132          * suppressing or terminating exception. However, since we came here
3133          * by a DAT access exception, the PSW still points to the faulting
3134          * instruction since DAT exceptions are nullifying. So we've got
3135          * to look up the current opcode to get the length of the instruction
3136          * to be able to forward the PSW.
3137          */
3138         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3139         ilen = insn_length(opcode);
3140         if (rc < 0) {
3141                 return rc;
3142         } else if (rc) {
3143                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3144                  * Forward by arbitrary ilc, injection will take care of
3145                  * nullification if necessary.
3146                  */
3147                 pgm_info = vcpu->arch.pgm;
3148                 ilen = 4;
3149         }
3150         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3151         kvm_s390_forward_psw(vcpu, ilen);
3152         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3153 }
3154
3155 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3156 {
3157         struct mcck_volatile_info *mcck_info;
3158         struct sie_page *sie_page;
3159
3160         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3161                    vcpu->arch.sie_block->icptcode);
3162         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3163
3164         if (guestdbg_enabled(vcpu))
3165                 kvm_s390_restore_guest_per_regs(vcpu);
3166
3167         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3168         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3169
3170         if (exit_reason == -EINTR) {
3171                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3172                 sie_page = container_of(vcpu->arch.sie_block,
3173                                         struct sie_page, sie_block);
3174                 mcck_info = &sie_page->mcck_info;
3175                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3176                 return 0;
3177         }
3178
3179         if (vcpu->arch.sie_block->icptcode > 0) {
3180                 int rc = kvm_handle_sie_intercept(vcpu);
3181
3182                 if (rc != -EOPNOTSUPP)
3183                         return rc;
3184                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3185                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3186                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3187                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3188                 return -EREMOTE;
3189         } else if (exit_reason != -EFAULT) {
3190                 vcpu->stat.exit_null++;
3191                 return 0;
3192         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3193                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3194                 vcpu->run->s390_ucontrol.trans_exc_code =
3195                                                 current->thread.gmap_addr;
3196                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3197                 return -EREMOTE;
3198         } else if (current->thread.gmap_pfault) {
3199                 trace_kvm_s390_major_guest_pfault(vcpu);
3200                 current->thread.gmap_pfault = 0;
3201                 if (kvm_arch_setup_async_pf(vcpu))
3202                         return 0;
3203                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3204         }
3205         return vcpu_post_run_fault_in_sie(vcpu);
3206 }
3207
3208 static int __vcpu_run(struct kvm_vcpu *vcpu)
3209 {
3210         int rc, exit_reason;
3211
3212         /*
3213          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3214          * ning the guest), so that memslots (and other stuff) are protected
3215          */
3216         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3217
3218         do {
3219                 rc = vcpu_pre_run(vcpu);
3220                 if (rc)
3221                         break;
3222
3223                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3224                 /*
3225                  * As PF_VCPU will be used in fault handler, between
3226                  * guest_enter and guest_exit should be no uaccess.
3227                  */
3228                 local_irq_disable();
3229                 guest_enter_irqoff();
3230                 __disable_cpu_timer_accounting(vcpu);
3231                 local_irq_enable();
3232                 exit_reason = sie64a(vcpu->arch.sie_block,
3233                                      vcpu->run->s.regs.gprs);
3234                 local_irq_disable();
3235                 __enable_cpu_timer_accounting(vcpu);
3236                 guest_exit_irqoff();
3237                 local_irq_enable();
3238                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3239
3240                 rc = vcpu_post_run(vcpu, exit_reason);
3241         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3242
3243         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3244         return rc;
3245 }
3246
3247 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3248 {
3249         struct runtime_instr_cb *riccb;
3250         struct gs_cb *gscb;
3251
3252         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3253         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3254         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3255         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3256         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3257                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3258         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3259                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3260                 /* some control register changes require a tlb flush */
3261                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3262         }
3263         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3264                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3265                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3266                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3267                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3268                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3269         }
3270         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3271                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3272                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3273                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3274                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3275                         kvm_clear_async_pf_completion_queue(vcpu);
3276         }
3277         /*
3278          * If userspace sets the riccb (e.g. after migration) to a valid state,
3279          * we should enable RI here instead of doing the lazy enablement.
3280          */
3281         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3282             test_kvm_facility(vcpu->kvm, 64) &&
3283             riccb->v &&
3284             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3285                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3286                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3287         }
3288         /*
3289          * If userspace sets the gscb (e.g. after migration) to non-zero,
3290          * we should enable GS here instead of doing the lazy enablement.
3291          */
3292         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3293             test_kvm_facility(vcpu->kvm, 133) &&
3294             gscb->gssm &&
3295             !vcpu->arch.gs_enabled) {
3296                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3297                 vcpu->arch.sie_block->ecb |= ECB_GS;
3298                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3299                 vcpu->arch.gs_enabled = 1;
3300         }
3301         save_access_regs(vcpu->arch.host_acrs);
3302         restore_access_regs(vcpu->run->s.regs.acrs);
3303         /* save host (userspace) fprs/vrs */
3304         save_fpu_regs();
3305         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3306         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3307         if (MACHINE_HAS_VX)
3308                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3309         else
3310                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3311         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3312         if (test_fp_ctl(current->thread.fpu.fpc))
3313                 /* User space provided an invalid FPC, let's clear it */
3314                 current->thread.fpu.fpc = 0;
3315         if (MACHINE_HAS_GS) {
3316                 preempt_disable();
3317                 __ctl_set_bit(2, 4);
3318                 if (current->thread.gs_cb) {
3319                         vcpu->arch.host_gscb = current->thread.gs_cb;
3320                         save_gs_cb(vcpu->arch.host_gscb);
3321                 }
3322                 if (vcpu->arch.gs_enabled) {
3323                         current->thread.gs_cb = (struct gs_cb *)
3324                                                 &vcpu->run->s.regs.gscb;
3325                         restore_gs_cb(current->thread.gs_cb);
3326                 }
3327                 preempt_enable();
3328         }
3329
3330         kvm_run->kvm_dirty_regs = 0;
3331 }
3332
3333 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3334 {
3335         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3336         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3337         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3338         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3339         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3340         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3341         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3342         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3343         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3344         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3345         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3346         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3347         save_access_regs(vcpu->run->s.regs.acrs);
3348         restore_access_regs(vcpu->arch.host_acrs);
3349         /* Save guest register state */
3350         save_fpu_regs();
3351         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3352         /* Restore will be done lazily at return */
3353         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3354         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3355         if (MACHINE_HAS_GS) {
3356                 __ctl_set_bit(2, 4);
3357                 if (vcpu->arch.gs_enabled)
3358                         save_gs_cb(current->thread.gs_cb);
3359                 preempt_disable();
3360                 current->thread.gs_cb = vcpu->arch.host_gscb;
3361                 restore_gs_cb(vcpu->arch.host_gscb);
3362                 preempt_enable();
3363                 if (!vcpu->arch.host_gscb)
3364                         __ctl_clear_bit(2, 4);
3365                 vcpu->arch.host_gscb = NULL;
3366         }
3367
3368 }
3369
3370 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3371 {
3372         int rc;
3373
3374         if (kvm_run->immediate_exit)
3375                 return -EINTR;
3376
3377         if (guestdbg_exit_pending(vcpu)) {
3378                 kvm_s390_prepare_debug_exit(vcpu);
3379                 return 0;
3380         }
3381
3382         kvm_sigset_activate(vcpu);
3383
3384         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3385                 kvm_s390_vcpu_start(vcpu);
3386         } else if (is_vcpu_stopped(vcpu)) {
3387                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3388                                    vcpu->vcpu_id);
3389                 return -EINVAL;
3390         }
3391
3392         sync_regs(vcpu, kvm_run);
3393         enable_cpu_timer_accounting(vcpu);
3394
3395         might_fault();
3396         rc = __vcpu_run(vcpu);
3397
3398         if (signal_pending(current) && !rc) {
3399                 kvm_run->exit_reason = KVM_EXIT_INTR;
3400                 rc = -EINTR;
3401         }
3402
3403         if (guestdbg_exit_pending(vcpu) && !rc)  {
3404                 kvm_s390_prepare_debug_exit(vcpu);
3405                 rc = 0;
3406         }
3407
3408         if (rc == -EREMOTE) {
3409                 /* userspace support is needed, kvm_run has been prepared */
3410                 rc = 0;
3411         }
3412
3413         disable_cpu_timer_accounting(vcpu);
3414         store_regs(vcpu, kvm_run);
3415
3416         kvm_sigset_deactivate(vcpu);
3417
3418         vcpu->stat.exit_userspace++;
3419         return rc;
3420 }
3421
3422 /*
3423  * store status at address
3424  * we use have two special cases:
3425  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3426  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3427  */
3428 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3429 {
3430         unsigned char archmode = 1;
3431         freg_t fprs[NUM_FPRS];
3432         unsigned int px;
3433         u64 clkcomp, cputm;
3434         int rc;
3435
3436         px = kvm_s390_get_prefix(vcpu);
3437         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3438                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3439                         return -EFAULT;
3440                 gpa = 0;
3441         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3442                 if (write_guest_real(vcpu, 163, &archmode, 1))
3443                         return -EFAULT;
3444                 gpa = px;
3445         } else
3446                 gpa -= __LC_FPREGS_SAVE_AREA;
3447
3448         /* manually convert vector registers if necessary */
3449         if (MACHINE_HAS_VX) {
3450                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3451                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3452                                      fprs, 128);
3453         } else {
3454                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3455                                      vcpu->run->s.regs.fprs, 128);
3456         }
3457         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3458                               vcpu->run->s.regs.gprs, 128);
3459         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3460                               &vcpu->arch.sie_block->gpsw, 16);
3461         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3462                               &px, 4);
3463         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3464                               &vcpu->run->s.regs.fpc, 4);
3465         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3466                               &vcpu->arch.sie_block->todpr, 4);
3467         cputm = kvm_s390_get_cpu_timer(vcpu);
3468         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3469                               &cputm, 8);
3470         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3471         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3472                               &clkcomp, 8);
3473         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3474                               &vcpu->run->s.regs.acrs, 64);
3475         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3476                               &vcpu->arch.sie_block->gcr, 128);
3477         return rc ? -EFAULT : 0;
3478 }
3479
3480 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3481 {
3482         /*
3483          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3484          * switch in the run ioctl. Let's update our copies before we save
3485          * it into the save area
3486          */
3487         save_fpu_regs();
3488         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3489         save_access_regs(vcpu->run->s.regs.acrs);
3490
3491         return kvm_s390_store_status_unloaded(vcpu, addr);
3492 }
3493
3494 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3495 {
3496         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3497         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3498 }
3499
3500 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3501 {
3502         unsigned int i;
3503         struct kvm_vcpu *vcpu;
3504
3505         kvm_for_each_vcpu(i, vcpu, kvm) {
3506                 __disable_ibs_on_vcpu(vcpu);
3507         }
3508 }
3509
3510 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3511 {
3512         if (!sclp.has_ibs)
3513                 return;
3514         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3515         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3516 }
3517
3518 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3519 {
3520         int i, online_vcpus, started_vcpus = 0;
3521
3522         if (!is_vcpu_stopped(vcpu))
3523                 return;
3524
3525         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3526         /* Only one cpu at a time may enter/leave the STOPPED state. */
3527         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3528         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3529
3530         for (i = 0; i < online_vcpus; i++) {
3531                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3532                         started_vcpus++;
3533         }
3534
3535         if (started_vcpus == 0) {
3536                 /* we're the only active VCPU -> speed it up */
3537                 __enable_ibs_on_vcpu(vcpu);
3538         } else if (started_vcpus == 1) {
3539                 /*
3540                  * As we are starting a second VCPU, we have to disable
3541                  * the IBS facility on all VCPUs to remove potentially
3542                  * oustanding ENABLE requests.
3543                  */
3544                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3545         }
3546
3547         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3548         /*
3549          * Another VCPU might have used IBS while we were offline.
3550          * Let's play safe and flush the VCPU at startup.
3551          */
3552         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3553         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3554         return;
3555 }
3556
3557 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3558 {
3559         int i, online_vcpus, started_vcpus = 0;
3560         struct kvm_vcpu *started_vcpu = NULL;
3561
3562         if (is_vcpu_stopped(vcpu))
3563                 return;
3564
3565         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3566         /* Only one cpu at a time may enter/leave the STOPPED state. */
3567         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3568         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3569
3570         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3571         kvm_s390_clear_stop_irq(vcpu);
3572
3573         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3574         __disable_ibs_on_vcpu(vcpu);
3575
3576         for (i = 0; i < online_vcpus; i++) {
3577                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3578                         started_vcpus++;
3579                         started_vcpu = vcpu->kvm->vcpus[i];
3580                 }
3581         }
3582
3583         if (started_vcpus == 1) {
3584                 /*
3585                  * As we only have one VCPU left, we want to enable the
3586                  * IBS facility for that VCPU to speed it up.
3587                  */
3588                 __enable_ibs_on_vcpu(started_vcpu);
3589         }
3590
3591         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3592         return;
3593 }
3594
3595 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3596                                      struct kvm_enable_cap *cap)
3597 {
3598         int r;
3599
3600         if (cap->flags)
3601                 return -EINVAL;
3602
3603         switch (cap->cap) {
3604         case KVM_CAP_S390_CSS_SUPPORT:
3605                 if (!vcpu->kvm->arch.css_support) {
3606                         vcpu->kvm->arch.css_support = 1;
3607                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3608                         trace_kvm_s390_enable_css(vcpu->kvm);
3609                 }
3610                 r = 0;
3611                 break;
3612         default:
3613                 r = -EINVAL;
3614                 break;
3615         }
3616         return r;
3617 }
3618
3619 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3620                                   struct kvm_s390_mem_op *mop)
3621 {
3622         void __user *uaddr = (void __user *)mop->buf;
3623         void *tmpbuf = NULL;
3624         int r, srcu_idx;
3625         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3626                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3627
3628         if (mop->flags & ~supported_flags)
3629                 return -EINVAL;
3630
3631         if (mop->size > MEM_OP_MAX_SIZE)
3632                 return -E2BIG;
3633
3634         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3635                 tmpbuf = vmalloc(mop->size);
3636                 if (!tmpbuf)
3637                         return -ENOMEM;
3638         }
3639
3640         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3641
3642         switch (mop->op) {
3643         case KVM_S390_MEMOP_LOGICAL_READ:
3644                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3645                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3646                                             mop->size, GACC_FETCH);
3647                         break;
3648                 }
3649                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3650                 if (r == 0) {
3651                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3652                                 r = -EFAULT;
3653                 }
3654                 break;
3655         case KVM_S390_MEMOP_LOGICAL_WRITE:
3656                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3657                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3658                                             mop->size, GACC_STORE);
3659                         break;
3660                 }
3661                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3662                         r = -EFAULT;
3663                         break;
3664                 }
3665                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3666                 break;
3667         default:
3668                 r = -EINVAL;
3669         }
3670
3671         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3672
3673         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3674                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3675
3676         vfree(tmpbuf);
3677         return r;
3678 }
3679
3680 long kvm_arch_vcpu_ioctl(struct file *filp,
3681                          unsigned int ioctl, unsigned long arg)
3682 {
3683         struct kvm_vcpu *vcpu = filp->private_data;
3684         void __user *argp = (void __user *)arg;
3685         int idx;
3686         long r;
3687
3688         switch (ioctl) {
3689         case KVM_S390_IRQ: {
3690                 struct kvm_s390_irq s390irq;
3691
3692                 r = -EFAULT;
3693                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3694                         break;
3695                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3696                 break;
3697         }
3698         case KVM_S390_INTERRUPT: {
3699                 struct kvm_s390_interrupt s390int;
3700                 struct kvm_s390_irq s390irq;
3701
3702                 r = -EFAULT;
3703                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3704                         break;
3705                 if (s390int_to_s390irq(&s390int, &s390irq))
3706                         return -EINVAL;
3707                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3708                 break;
3709         }
3710         case KVM_S390_STORE_STATUS:
3711                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3712                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3713                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3714                 break;
3715         case KVM_S390_SET_INITIAL_PSW: {
3716                 psw_t psw;
3717
3718                 r = -EFAULT;
3719                 if (copy_from_user(&psw, argp, sizeof(psw)))
3720                         break;
3721                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3722                 break;
3723         }
3724         case KVM_S390_INITIAL_RESET:
3725                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3726                 break;
3727         case KVM_SET_ONE_REG:
3728         case KVM_GET_ONE_REG: {
3729                 struct kvm_one_reg reg;
3730                 r = -EFAULT;
3731                 if (copy_from_user(&reg, argp, sizeof(reg)))
3732                         break;
3733                 if (ioctl == KVM_SET_ONE_REG)
3734                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3735                 else
3736                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3737                 break;
3738         }
3739 #ifdef CONFIG_KVM_S390_UCONTROL
3740         case KVM_S390_UCAS_MAP: {
3741                 struct kvm_s390_ucas_mapping ucasmap;
3742
3743                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3744                         r = -EFAULT;
3745                         break;
3746                 }
3747
3748                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3749                         r = -EINVAL;
3750                         break;
3751                 }
3752
3753                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3754                                      ucasmap.vcpu_addr, ucasmap.length);
3755                 break;
3756         }
3757         case KVM_S390_UCAS_UNMAP: {
3758                 struct kvm_s390_ucas_mapping ucasmap;
3759
3760                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3761                         r = -EFAULT;
3762                         break;
3763                 }
3764
3765                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3766                         r = -EINVAL;
3767                         break;
3768                 }
3769
3770                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3771                         ucasmap.length);
3772                 break;
3773         }
3774 #endif
3775         case KVM_S390_VCPU_FAULT: {
3776                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3777                 break;
3778         }
3779         case KVM_ENABLE_CAP:
3780         {
3781                 struct kvm_enable_cap cap;
3782                 r = -EFAULT;
3783                 if (copy_from_user(&cap, argp, sizeof(cap)))
3784                         break;
3785                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3786                 break;
3787         }
3788         case KVM_S390_MEM_OP: {
3789                 struct kvm_s390_mem_op mem_op;
3790
3791                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3792                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3793                 else
3794                         r = -EFAULT;
3795                 break;
3796         }
3797         case KVM_S390_SET_IRQ_STATE: {
3798                 struct kvm_s390_irq_state irq_state;
3799
3800                 r = -EFAULT;
3801                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3802                         break;
3803                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3804                     irq_state.len == 0 ||
3805                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3806                         r = -EINVAL;
3807                         break;
3808                 }
3809                 /* do not use irq_state.flags, it will break old QEMUs */
3810                 r = kvm_s390_set_irq_state(vcpu,
3811                                            (void __user *) irq_state.buf,
3812                                            irq_state.len);
3813                 break;
3814         }
3815         case KVM_S390_GET_IRQ_STATE: {
3816                 struct kvm_s390_irq_state irq_state;
3817
3818                 r = -EFAULT;
3819                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3820                         break;
3821                 if (irq_state.len == 0) {
3822                         r = -EINVAL;
3823                         break;
3824                 }
3825                 /* do not use irq_state.flags, it will break old QEMUs */
3826                 r = kvm_s390_get_irq_state(vcpu,
3827                                            (__u8 __user *)  irq_state.buf,
3828                                            irq_state.len);
3829                 break;
3830         }
3831         default:
3832                 r = -ENOTTY;
3833         }
3834         return r;
3835 }
3836
3837 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3838 {
3839 #ifdef CONFIG_KVM_S390_UCONTROL
3840         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3841                  && (kvm_is_ucontrol(vcpu->kvm))) {
3842                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3843                 get_page(vmf->page);
3844                 return 0;
3845         }
3846 #endif
3847         return VM_FAULT_SIGBUS;
3848 }
3849
3850 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3851                             unsigned long npages)
3852 {
3853         return 0;
3854 }
3855
3856 /* Section: memory related */
3857 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3858                                    struct kvm_memory_slot *memslot,
3859                                    const struct kvm_userspace_memory_region *mem,
3860                                    enum kvm_mr_change change)
3861 {
3862         /* A few sanity checks. We can have memory slots which have to be
3863            located/ended at a segment boundary (1MB). The memory in userland is
3864            ok to be fragmented into various different vmas. It is okay to mmap()
3865            and munmap() stuff in this slot after doing this call at any time */
3866
3867         if (mem->userspace_addr & 0xffffful)
3868                 return -EINVAL;
3869
3870         if (mem->memory_size & 0xffffful)
3871                 return -EINVAL;
3872
3873         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3874                 return -EINVAL;
3875
3876         return 0;
3877 }
3878
3879 void kvm_arch_commit_memory_region(struct kvm *kvm,
3880                                 const struct kvm_userspace_memory_region *mem,
3881                                 const struct kvm_memory_slot *old,
3882                                 const struct kvm_memory_slot *new,
3883                                 enum kvm_mr_change change)
3884 {
3885         int rc;
3886
3887         /* If the basics of the memslot do not change, we do not want
3888          * to update the gmap. Every update causes several unnecessary
3889          * segment translation exceptions. This is usually handled just
3890          * fine by the normal fault handler + gmap, but it will also
3891          * cause faults on the prefix page of running guest CPUs.
3892          */
3893         if (old->userspace_addr == mem->userspace_addr &&
3894             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3895             old->npages * PAGE_SIZE == mem->memory_size)
3896                 return;
3897
3898         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3899                 mem->guest_phys_addr, mem->memory_size);
3900         if (rc)
3901                 pr_warn("failed to commit memory region\n");
3902         return;
3903 }
3904
3905 static inline unsigned long nonhyp_mask(int i)
3906 {
3907         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3908
3909         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3910 }
3911
3912 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3913 {
3914         vcpu->valid_wakeup = false;
3915 }
3916
3917 static int __init kvm_s390_init(void)
3918 {
3919         int i;
3920
3921         if (!sclp.has_sief2) {
3922                 pr_info("SIE not available\n");
3923                 return -ENODEV;
3924         }
3925
3926         for (i = 0; i < 16; i++)
3927                 kvm_s390_fac_list_mask[i] |=
3928                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3929
3930         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3931 }
3932
3933 static void __exit kvm_s390_exit(void)
3934 {
3935         kvm_exit();
3936 }
3937
3938 module_init(kvm_s390_init);
3939 module_exit(kvm_s390_exit);
3940
3941 /*
3942  * Enable autoloading of the kvm module.
3943  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3944  * since x86 takes a different approach.
3945  */
3946 #include <linux/miscdevice.h>
3947 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3948 MODULE_ALIAS("devname:kvm");