Merge tag 'trace-v4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[sfrench/cifs-2.6.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60                            (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65         { "userspace_handled", VCPU_STAT(exit_userspace) },
66         { "exit_null", VCPU_STAT(exit_null) },
67         { "exit_validity", VCPU_STAT(exit_validity) },
68         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69         { "exit_external_request", VCPU_STAT(exit_external_request) },
70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71         { "exit_instruction", VCPU_STAT(exit_instruction) },
72         { "exit_pei", VCPU_STAT(exit_pei) },
73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95         { "instruction_spx", VCPU_STAT(instruction_spx) },
96         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97         { "instruction_stap", VCPU_STAT(instruction_stap) },
98         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102         { "instruction_essa", VCPU_STAT(instruction_essa) },
103         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107         { "instruction_sie", VCPU_STAT(instruction_sie) },
108         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124         { "diagnose_10", VCPU_STAT(diagnose_10) },
125         { "diagnose_44", VCPU_STAT(diagnose_44) },
126         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127         { "diagnose_258", VCPU_STAT(diagnose_258) },
128         { "diagnose_308", VCPU_STAT(diagnose_308) },
129         { "diagnose_500", VCPU_STAT(diagnose_500) },
130         { NULL }
131 };
132
133 struct kvm_s390_tod_clock_ext {
134         __u8 epoch_idx;
135         __u64 tod;
136         __u8 reserved[7];
137 } __packed;
138
139 /* allow nested virtualization in KVM (if enabled by user space) */
140 static int nested;
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
143
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
146
147 unsigned long kvm_s390_fac_list_mask_size(void)
148 {
149         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150         return ARRAY_SIZE(kvm_s390_fac_list_mask);
151 }
152
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
157
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
161
162 /* Section: not file related */
163 int kvm_arch_hardware_enable(void)
164 {
165         /* every s390 is virtualization enabled ;-) */
166         return 0;
167 }
168
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
170                               unsigned long end);
171
172 /*
173  * This callback is executed during stop_machine(). All CPUs are therefore
174  * temporarily stopped. In order not to change guest behavior, we have to
175  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
176  * so a CPU won't be stopped while calculating with the epoch.
177  */
178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
179                           void *v)
180 {
181         struct kvm *kvm;
182         struct kvm_vcpu *vcpu;
183         int i;
184         unsigned long long *delta = v;
185
186         list_for_each_entry(kvm, &vm_list, vm_list) {
187                 kvm->arch.epoch -= *delta;
188                 kvm_for_each_vcpu(i, vcpu, kvm) {
189                         vcpu->arch.sie_block->epoch -= *delta;
190                         if (vcpu->arch.cputm_enabled)
191                                 vcpu->arch.cputm_start += *delta;
192                         if (vcpu->arch.vsie_block)
193                                 vcpu->arch.vsie_block->epoch -= *delta;
194                 }
195         }
196         return NOTIFY_OK;
197 }
198
199 static struct notifier_block kvm_clock_notifier = {
200         .notifier_call = kvm_clock_sync,
201 };
202
203 int kvm_arch_hardware_setup(void)
204 {
205         gmap_notifier.notifier_call = kvm_gmap_notifier;
206         gmap_register_pte_notifier(&gmap_notifier);
207         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
208         gmap_register_pte_notifier(&vsie_gmap_notifier);
209         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
210                                        &kvm_clock_notifier);
211         return 0;
212 }
213
214 void kvm_arch_hardware_unsetup(void)
215 {
216         gmap_unregister_pte_notifier(&gmap_notifier);
217         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
218         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
219                                          &kvm_clock_notifier);
220 }
221
222 static void allow_cpu_feat(unsigned long nr)
223 {
224         set_bit_inv(nr, kvm_s390_available_cpu_feat);
225 }
226
227 static inline int plo_test_bit(unsigned char nr)
228 {
229         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
230         int cc;
231
232         asm volatile(
233                 /* Parameter registers are ignored for "test bit" */
234                 "       plo     0,0,0,0(0)\n"
235                 "       ipm     %0\n"
236                 "       srl     %0,28\n"
237                 : "=d" (cc)
238                 : "d" (r0)
239                 : "cc");
240         return cc == 0;
241 }
242
243 static void kvm_s390_cpu_feat_init(void)
244 {
245         int i;
246
247         for (i = 0; i < 256; ++i) {
248                 if (plo_test_bit(i))
249                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
250         }
251
252         if (test_facility(28)) /* TOD-clock steering */
253                 ptff(kvm_s390_available_subfunc.ptff,
254                      sizeof(kvm_s390_available_subfunc.ptff),
255                      PTFF_QAF);
256
257         if (test_facility(17)) { /* MSA */
258                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
259                               kvm_s390_available_subfunc.kmac);
260                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
261                               kvm_s390_available_subfunc.kmc);
262                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
263                               kvm_s390_available_subfunc.km);
264                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
265                               kvm_s390_available_subfunc.kimd);
266                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
267                               kvm_s390_available_subfunc.klmd);
268         }
269         if (test_facility(76)) /* MSA3 */
270                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
271                               kvm_s390_available_subfunc.pckmo);
272         if (test_facility(77)) { /* MSA4 */
273                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
274                               kvm_s390_available_subfunc.kmctr);
275                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
276                               kvm_s390_available_subfunc.kmf);
277                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
278                               kvm_s390_available_subfunc.kmo);
279                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
280                               kvm_s390_available_subfunc.pcc);
281         }
282         if (test_facility(57)) /* MSA5 */
283                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
284                               kvm_s390_available_subfunc.ppno);
285
286         if (test_facility(146)) /* MSA8 */
287                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
288                               kvm_s390_available_subfunc.kma);
289
290         if (MACHINE_HAS_ESOP)
291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
292         /*
293          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
294          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
295          */
296         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
297             !test_facility(3) || !nested)
298                 return;
299         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
300         if (sclp.has_64bscao)
301                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
302         if (sclp.has_siif)
303                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
304         if (sclp.has_gpere)
305                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
306         if (sclp.has_gsls)
307                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
308         if (sclp.has_ib)
309                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
310         if (sclp.has_cei)
311                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
312         if (sclp.has_ibs)
313                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
314         if (sclp.has_kss)
315                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
316         /*
317          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
318          * all skey handling functions read/set the skey from the PGSTE
319          * instead of the real storage key.
320          *
321          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
322          * pages being detected as preserved although they are resident.
323          *
324          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
325          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
326          *
327          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
328          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
329          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
330          *
331          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
332          * cannot easily shadow the SCA because of the ipte lock.
333          */
334 }
335
336 int kvm_arch_init(void *opaque)
337 {
338         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
339         if (!kvm_s390_dbf)
340                 return -ENOMEM;
341
342         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
343                 debug_unregister(kvm_s390_dbf);
344                 return -ENOMEM;
345         }
346
347         kvm_s390_cpu_feat_init();
348
349         /* Register floating interrupt controller interface. */
350         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
351 }
352
353 void kvm_arch_exit(void)
354 {
355         debug_unregister(kvm_s390_dbf);
356 }
357
358 /* Section: device related */
359 long kvm_arch_dev_ioctl(struct file *filp,
360                         unsigned int ioctl, unsigned long arg)
361 {
362         if (ioctl == KVM_S390_ENABLE_SIE)
363                 return s390_enable_sie();
364         return -EINVAL;
365 }
366
367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
368 {
369         int r;
370
371         switch (ext) {
372         case KVM_CAP_S390_PSW:
373         case KVM_CAP_S390_GMAP:
374         case KVM_CAP_SYNC_MMU:
375 #ifdef CONFIG_KVM_S390_UCONTROL
376         case KVM_CAP_S390_UCONTROL:
377 #endif
378         case KVM_CAP_ASYNC_PF:
379         case KVM_CAP_SYNC_REGS:
380         case KVM_CAP_ONE_REG:
381         case KVM_CAP_ENABLE_CAP:
382         case KVM_CAP_S390_CSS_SUPPORT:
383         case KVM_CAP_IOEVENTFD:
384         case KVM_CAP_DEVICE_CTRL:
385         case KVM_CAP_ENABLE_CAP_VM:
386         case KVM_CAP_S390_IRQCHIP:
387         case KVM_CAP_VM_ATTRIBUTES:
388         case KVM_CAP_MP_STATE:
389         case KVM_CAP_IMMEDIATE_EXIT:
390         case KVM_CAP_S390_INJECT_IRQ:
391         case KVM_CAP_S390_USER_SIGP:
392         case KVM_CAP_S390_USER_STSI:
393         case KVM_CAP_S390_SKEYS:
394         case KVM_CAP_S390_IRQ_STATE:
395         case KVM_CAP_S390_USER_INSTR0:
396         case KVM_CAP_S390_CMMA_MIGRATION:
397         case KVM_CAP_S390_AIS:
398         case KVM_CAP_S390_AIS_MIGRATION:
399                 r = 1;
400                 break;
401         case KVM_CAP_S390_MEM_OP:
402                 r = MEM_OP_MAX_SIZE;
403                 break;
404         case KVM_CAP_NR_VCPUS:
405         case KVM_CAP_MAX_VCPUS:
406                 r = KVM_S390_BSCA_CPU_SLOTS;
407                 if (!kvm_s390_use_sca_entries())
408                         r = KVM_MAX_VCPUS;
409                 else if (sclp.has_esca && sclp.has_64bscao)
410                         r = KVM_S390_ESCA_CPU_SLOTS;
411                 break;
412         case KVM_CAP_NR_MEMSLOTS:
413                 r = KVM_USER_MEM_SLOTS;
414                 break;
415         case KVM_CAP_S390_COW:
416                 r = MACHINE_HAS_ESOP;
417                 break;
418         case KVM_CAP_S390_VECTOR_REGISTERS:
419                 r = MACHINE_HAS_VX;
420                 break;
421         case KVM_CAP_S390_RI:
422                 r = test_facility(64);
423                 break;
424         case KVM_CAP_S390_GS:
425                 r = test_facility(133);
426                 break;
427         default:
428                 r = 0;
429         }
430         return r;
431 }
432
433 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
434                                         struct kvm_memory_slot *memslot)
435 {
436         gfn_t cur_gfn, last_gfn;
437         unsigned long address;
438         struct gmap *gmap = kvm->arch.gmap;
439
440         /* Loop over all guest pages */
441         last_gfn = memslot->base_gfn + memslot->npages;
442         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
443                 address = gfn_to_hva_memslot(memslot, cur_gfn);
444
445                 if (test_and_clear_guest_dirty(gmap->mm, address))
446                         mark_page_dirty(kvm, cur_gfn);
447                 if (fatal_signal_pending(current))
448                         return;
449                 cond_resched();
450         }
451 }
452
453 /* Section: vm related */
454 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
455
456 /*
457  * Get (and clear) the dirty memory log for a memory slot.
458  */
459 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
460                                struct kvm_dirty_log *log)
461 {
462         int r;
463         unsigned long n;
464         struct kvm_memslots *slots;
465         struct kvm_memory_slot *memslot;
466         int is_dirty = 0;
467
468         if (kvm_is_ucontrol(kvm))
469                 return -EINVAL;
470
471         mutex_lock(&kvm->slots_lock);
472
473         r = -EINVAL;
474         if (log->slot >= KVM_USER_MEM_SLOTS)
475                 goto out;
476
477         slots = kvm_memslots(kvm);
478         memslot = id_to_memslot(slots, log->slot);
479         r = -ENOENT;
480         if (!memslot->dirty_bitmap)
481                 goto out;
482
483         kvm_s390_sync_dirty_log(kvm, memslot);
484         r = kvm_get_dirty_log(kvm, log, &is_dirty);
485         if (r)
486                 goto out;
487
488         /* Clear the dirty log */
489         if (is_dirty) {
490                 n = kvm_dirty_bitmap_bytes(memslot);
491                 memset(memslot->dirty_bitmap, 0, n);
492         }
493         r = 0;
494 out:
495         mutex_unlock(&kvm->slots_lock);
496         return r;
497 }
498
499 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
500 {
501         unsigned int i;
502         struct kvm_vcpu *vcpu;
503
504         kvm_for_each_vcpu(i, vcpu, kvm) {
505                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
506         }
507 }
508
509 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
510 {
511         int r;
512
513         if (cap->flags)
514                 return -EINVAL;
515
516         switch (cap->cap) {
517         case KVM_CAP_S390_IRQCHIP:
518                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
519                 kvm->arch.use_irqchip = 1;
520                 r = 0;
521                 break;
522         case KVM_CAP_S390_USER_SIGP:
523                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
524                 kvm->arch.user_sigp = 1;
525                 r = 0;
526                 break;
527         case KVM_CAP_S390_VECTOR_REGISTERS:
528                 mutex_lock(&kvm->lock);
529                 if (kvm->created_vcpus) {
530                         r = -EBUSY;
531                 } else if (MACHINE_HAS_VX) {
532                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
533                         set_kvm_facility(kvm->arch.model.fac_list, 129);
534                         if (test_facility(134)) {
535                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
536                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
537                         }
538                         if (test_facility(135)) {
539                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
540                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
541                         }
542                         r = 0;
543                 } else
544                         r = -EINVAL;
545                 mutex_unlock(&kvm->lock);
546                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
547                          r ? "(not available)" : "(success)");
548                 break;
549         case KVM_CAP_S390_RI:
550                 r = -EINVAL;
551                 mutex_lock(&kvm->lock);
552                 if (kvm->created_vcpus) {
553                         r = -EBUSY;
554                 } else if (test_facility(64)) {
555                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
556                         set_kvm_facility(kvm->arch.model.fac_list, 64);
557                         r = 0;
558                 }
559                 mutex_unlock(&kvm->lock);
560                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
561                          r ? "(not available)" : "(success)");
562                 break;
563         case KVM_CAP_S390_AIS:
564                 mutex_lock(&kvm->lock);
565                 if (kvm->created_vcpus) {
566                         r = -EBUSY;
567                 } else {
568                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
569                         set_kvm_facility(kvm->arch.model.fac_list, 72);
570                         r = 0;
571                 }
572                 mutex_unlock(&kvm->lock);
573                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
574                          r ? "(not available)" : "(success)");
575                 break;
576         case KVM_CAP_S390_GS:
577                 r = -EINVAL;
578                 mutex_lock(&kvm->lock);
579                 if (atomic_read(&kvm->online_vcpus)) {
580                         r = -EBUSY;
581                 } else if (test_facility(133)) {
582                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
583                         set_kvm_facility(kvm->arch.model.fac_list, 133);
584                         r = 0;
585                 }
586                 mutex_unlock(&kvm->lock);
587                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
588                          r ? "(not available)" : "(success)");
589                 break;
590         case KVM_CAP_S390_USER_STSI:
591                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
592                 kvm->arch.user_stsi = 1;
593                 r = 0;
594                 break;
595         case KVM_CAP_S390_USER_INSTR0:
596                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
597                 kvm->arch.user_instr0 = 1;
598                 icpt_operexc_on_all_vcpus(kvm);
599                 r = 0;
600                 break;
601         default:
602                 r = -EINVAL;
603                 break;
604         }
605         return r;
606 }
607
608 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
609 {
610         int ret;
611
612         switch (attr->attr) {
613         case KVM_S390_VM_MEM_LIMIT_SIZE:
614                 ret = 0;
615                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
616                          kvm->arch.mem_limit);
617                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
618                         ret = -EFAULT;
619                 break;
620         default:
621                 ret = -ENXIO;
622                 break;
623         }
624         return ret;
625 }
626
627 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
628 {
629         int ret;
630         unsigned int idx;
631         switch (attr->attr) {
632         case KVM_S390_VM_MEM_ENABLE_CMMA:
633                 ret = -ENXIO;
634                 if (!sclp.has_cmma)
635                         break;
636
637                 ret = -EBUSY;
638                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
639                 mutex_lock(&kvm->lock);
640                 if (!kvm->created_vcpus) {
641                         kvm->arch.use_cmma = 1;
642                         ret = 0;
643                 }
644                 mutex_unlock(&kvm->lock);
645                 break;
646         case KVM_S390_VM_MEM_CLR_CMMA:
647                 ret = -ENXIO;
648                 if (!sclp.has_cmma)
649                         break;
650                 ret = -EINVAL;
651                 if (!kvm->arch.use_cmma)
652                         break;
653
654                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
655                 mutex_lock(&kvm->lock);
656                 idx = srcu_read_lock(&kvm->srcu);
657                 s390_reset_cmma(kvm->arch.gmap->mm);
658                 srcu_read_unlock(&kvm->srcu, idx);
659                 mutex_unlock(&kvm->lock);
660                 ret = 0;
661                 break;
662         case KVM_S390_VM_MEM_LIMIT_SIZE: {
663                 unsigned long new_limit;
664
665                 if (kvm_is_ucontrol(kvm))
666                         return -EINVAL;
667
668                 if (get_user(new_limit, (u64 __user *)attr->addr))
669                         return -EFAULT;
670
671                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
672                     new_limit > kvm->arch.mem_limit)
673                         return -E2BIG;
674
675                 if (!new_limit)
676                         return -EINVAL;
677
678                 /* gmap_create takes last usable address */
679                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
680                         new_limit -= 1;
681
682                 ret = -EBUSY;
683                 mutex_lock(&kvm->lock);
684                 if (!kvm->created_vcpus) {
685                         /* gmap_create will round the limit up */
686                         struct gmap *new = gmap_create(current->mm, new_limit);
687
688                         if (!new) {
689                                 ret = -ENOMEM;
690                         } else {
691                                 gmap_remove(kvm->arch.gmap);
692                                 new->private = kvm;
693                                 kvm->arch.gmap = new;
694                                 ret = 0;
695                         }
696                 }
697                 mutex_unlock(&kvm->lock);
698                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
699                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
700                          (void *) kvm->arch.gmap->asce);
701                 break;
702         }
703         default:
704                 ret = -ENXIO;
705                 break;
706         }
707         return ret;
708 }
709
710 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
711
712 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
713 {
714         struct kvm_vcpu *vcpu;
715         int i;
716
717         if (!test_kvm_facility(kvm, 76))
718                 return -EINVAL;
719
720         mutex_lock(&kvm->lock);
721         switch (attr->attr) {
722         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
723                 get_random_bytes(
724                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
725                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
726                 kvm->arch.crypto.aes_kw = 1;
727                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
728                 break;
729         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
730                 get_random_bytes(
731                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
732                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
733                 kvm->arch.crypto.dea_kw = 1;
734                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
735                 break;
736         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
737                 kvm->arch.crypto.aes_kw = 0;
738                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
739                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
740                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
741                 break;
742         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
743                 kvm->arch.crypto.dea_kw = 0;
744                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
745                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
746                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
747                 break;
748         default:
749                 mutex_unlock(&kvm->lock);
750                 return -ENXIO;
751         }
752
753         kvm_for_each_vcpu(i, vcpu, kvm) {
754                 kvm_s390_vcpu_crypto_setup(vcpu);
755                 exit_sie(vcpu);
756         }
757         mutex_unlock(&kvm->lock);
758         return 0;
759 }
760
761 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
762 {
763         int cx;
764         struct kvm_vcpu *vcpu;
765
766         kvm_for_each_vcpu(cx, vcpu, kvm)
767                 kvm_s390_sync_request(req, vcpu);
768 }
769
770 /*
771  * Must be called with kvm->srcu held to avoid races on memslots, and with
772  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
773  */
774 static int kvm_s390_vm_start_migration(struct kvm *kvm)
775 {
776         struct kvm_s390_migration_state *mgs;
777         struct kvm_memory_slot *ms;
778         /* should be the only one */
779         struct kvm_memslots *slots;
780         unsigned long ram_pages;
781         int slotnr;
782
783         /* migration mode already enabled */
784         if (kvm->arch.migration_state)
785                 return 0;
786
787         slots = kvm_memslots(kvm);
788         if (!slots || !slots->used_slots)
789                 return -EINVAL;
790
791         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
792         if (!mgs)
793                 return -ENOMEM;
794         kvm->arch.migration_state = mgs;
795
796         if (kvm->arch.use_cmma) {
797                 /*
798                  * Get the last slot. They should be sorted by base_gfn, so the
799                  * last slot is also the one at the end of the address space.
800                  * We have verified above that at least one slot is present.
801                  */
802                 ms = slots->memslots + slots->used_slots - 1;
803                 /* round up so we only use full longs */
804                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
805                 /* allocate enough bytes to store all the bits */
806                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
807                 if (!mgs->pgste_bitmap) {
808                         kfree(mgs);
809                         kvm->arch.migration_state = NULL;
810                         return -ENOMEM;
811                 }
812
813                 mgs->bitmap_size = ram_pages;
814                 atomic64_set(&mgs->dirty_pages, ram_pages);
815                 /* mark all the pages in active slots as dirty */
816                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
817                         ms = slots->memslots + slotnr;
818                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
819                 }
820
821                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
822         }
823         return 0;
824 }
825
826 /*
827  * Must be called with kvm->lock to avoid races with ourselves and
828  * kvm_s390_vm_start_migration.
829  */
830 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
831 {
832         struct kvm_s390_migration_state *mgs;
833
834         /* migration mode already disabled */
835         if (!kvm->arch.migration_state)
836                 return 0;
837         mgs = kvm->arch.migration_state;
838         kvm->arch.migration_state = NULL;
839
840         if (kvm->arch.use_cmma) {
841                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
842                 vfree(mgs->pgste_bitmap);
843         }
844         kfree(mgs);
845         return 0;
846 }
847
848 static int kvm_s390_vm_set_migration(struct kvm *kvm,
849                                      struct kvm_device_attr *attr)
850 {
851         int idx, res = -ENXIO;
852
853         mutex_lock(&kvm->lock);
854         switch (attr->attr) {
855         case KVM_S390_VM_MIGRATION_START:
856                 idx = srcu_read_lock(&kvm->srcu);
857                 res = kvm_s390_vm_start_migration(kvm);
858                 srcu_read_unlock(&kvm->srcu, idx);
859                 break;
860         case KVM_S390_VM_MIGRATION_STOP:
861                 res = kvm_s390_vm_stop_migration(kvm);
862                 break;
863         default:
864                 break;
865         }
866         mutex_unlock(&kvm->lock);
867
868         return res;
869 }
870
871 static int kvm_s390_vm_get_migration(struct kvm *kvm,
872                                      struct kvm_device_attr *attr)
873 {
874         u64 mig = (kvm->arch.migration_state != NULL);
875
876         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
877                 return -ENXIO;
878
879         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
880                 return -EFAULT;
881         return 0;
882 }
883
884 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
885 {
886         struct kvm_s390_vm_tod_clock gtod;
887
888         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
889                 return -EFAULT;
890
891         if (test_kvm_facility(kvm, 139))
892                 kvm_s390_set_tod_clock_ext(kvm, &gtod);
893         else if (gtod.epoch_idx == 0)
894                 kvm_s390_set_tod_clock(kvm, gtod.tod);
895         else
896                 return -EINVAL;
897
898         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
899                 gtod.epoch_idx, gtod.tod);
900
901         return 0;
902 }
903
904 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
905 {
906         u8 gtod_high;
907
908         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
909                                            sizeof(gtod_high)))
910                 return -EFAULT;
911
912         if (gtod_high != 0)
913                 return -EINVAL;
914         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
915
916         return 0;
917 }
918
919 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
920 {
921         u64 gtod;
922
923         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
924                 return -EFAULT;
925
926         kvm_s390_set_tod_clock(kvm, gtod);
927         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
928         return 0;
929 }
930
931 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
932 {
933         int ret;
934
935         if (attr->flags)
936                 return -EINVAL;
937
938         switch (attr->attr) {
939         case KVM_S390_VM_TOD_EXT:
940                 ret = kvm_s390_set_tod_ext(kvm, attr);
941                 break;
942         case KVM_S390_VM_TOD_HIGH:
943                 ret = kvm_s390_set_tod_high(kvm, attr);
944                 break;
945         case KVM_S390_VM_TOD_LOW:
946                 ret = kvm_s390_set_tod_low(kvm, attr);
947                 break;
948         default:
949                 ret = -ENXIO;
950                 break;
951         }
952         return ret;
953 }
954
955 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
956                                         struct kvm_s390_vm_tod_clock *gtod)
957 {
958         struct kvm_s390_tod_clock_ext htod;
959
960         preempt_disable();
961
962         get_tod_clock_ext((char *)&htod);
963
964         gtod->tod = htod.tod + kvm->arch.epoch;
965         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
966
967         if (gtod->tod < htod.tod)
968                 gtod->epoch_idx += 1;
969
970         preempt_enable();
971 }
972
973 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
974 {
975         struct kvm_s390_vm_tod_clock gtod;
976
977         memset(&gtod, 0, sizeof(gtod));
978
979         if (test_kvm_facility(kvm, 139))
980                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
981         else
982                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
983
984         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
985                 return -EFAULT;
986
987         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
988                 gtod.epoch_idx, gtod.tod);
989         return 0;
990 }
991
992 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
993 {
994         u8 gtod_high = 0;
995
996         if (copy_to_user((void __user *)attr->addr, &gtod_high,
997                                          sizeof(gtod_high)))
998                 return -EFAULT;
999         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1000
1001         return 0;
1002 }
1003
1004 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1005 {
1006         u64 gtod;
1007
1008         gtod = kvm_s390_get_tod_clock_fast(kvm);
1009         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1010                 return -EFAULT;
1011         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1012
1013         return 0;
1014 }
1015
1016 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1017 {
1018         int ret;
1019
1020         if (attr->flags)
1021                 return -EINVAL;
1022
1023         switch (attr->attr) {
1024         case KVM_S390_VM_TOD_EXT:
1025                 ret = kvm_s390_get_tod_ext(kvm, attr);
1026                 break;
1027         case KVM_S390_VM_TOD_HIGH:
1028                 ret = kvm_s390_get_tod_high(kvm, attr);
1029                 break;
1030         case KVM_S390_VM_TOD_LOW:
1031                 ret = kvm_s390_get_tod_low(kvm, attr);
1032                 break;
1033         default:
1034                 ret = -ENXIO;
1035                 break;
1036         }
1037         return ret;
1038 }
1039
1040 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1041 {
1042         struct kvm_s390_vm_cpu_processor *proc;
1043         u16 lowest_ibc, unblocked_ibc;
1044         int ret = 0;
1045
1046         mutex_lock(&kvm->lock);
1047         if (kvm->created_vcpus) {
1048                 ret = -EBUSY;
1049                 goto out;
1050         }
1051         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1052         if (!proc) {
1053                 ret = -ENOMEM;
1054                 goto out;
1055         }
1056         if (!copy_from_user(proc, (void __user *)attr->addr,
1057                             sizeof(*proc))) {
1058                 kvm->arch.model.cpuid = proc->cpuid;
1059                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1060                 unblocked_ibc = sclp.ibc & 0xfff;
1061                 if (lowest_ibc && proc->ibc) {
1062                         if (proc->ibc > unblocked_ibc)
1063                                 kvm->arch.model.ibc = unblocked_ibc;
1064                         else if (proc->ibc < lowest_ibc)
1065                                 kvm->arch.model.ibc = lowest_ibc;
1066                         else
1067                                 kvm->arch.model.ibc = proc->ibc;
1068                 }
1069                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1070                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1071                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1072                          kvm->arch.model.ibc,
1073                          kvm->arch.model.cpuid);
1074                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1075                          kvm->arch.model.fac_list[0],
1076                          kvm->arch.model.fac_list[1],
1077                          kvm->arch.model.fac_list[2]);
1078         } else
1079                 ret = -EFAULT;
1080         kfree(proc);
1081 out:
1082         mutex_unlock(&kvm->lock);
1083         return ret;
1084 }
1085
1086 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1087                                        struct kvm_device_attr *attr)
1088 {
1089         struct kvm_s390_vm_cpu_feat data;
1090         int ret = -EBUSY;
1091
1092         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1093                 return -EFAULT;
1094         if (!bitmap_subset((unsigned long *) data.feat,
1095                            kvm_s390_available_cpu_feat,
1096                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1097                 return -EINVAL;
1098
1099         mutex_lock(&kvm->lock);
1100         if (!atomic_read(&kvm->online_vcpus)) {
1101                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1102                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1103                 ret = 0;
1104         }
1105         mutex_unlock(&kvm->lock);
1106         return ret;
1107 }
1108
1109 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1110                                           struct kvm_device_attr *attr)
1111 {
1112         /*
1113          * Once supported by kernel + hw, we have to store the subfunctions
1114          * in kvm->arch and remember that user space configured them.
1115          */
1116         return -ENXIO;
1117 }
1118
1119 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1120 {
1121         int ret = -ENXIO;
1122
1123         switch (attr->attr) {
1124         case KVM_S390_VM_CPU_PROCESSOR:
1125                 ret = kvm_s390_set_processor(kvm, attr);
1126                 break;
1127         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1128                 ret = kvm_s390_set_processor_feat(kvm, attr);
1129                 break;
1130         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1131                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1132                 break;
1133         }
1134         return ret;
1135 }
1136
1137 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1138 {
1139         struct kvm_s390_vm_cpu_processor *proc;
1140         int ret = 0;
1141
1142         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1143         if (!proc) {
1144                 ret = -ENOMEM;
1145                 goto out;
1146         }
1147         proc->cpuid = kvm->arch.model.cpuid;
1148         proc->ibc = kvm->arch.model.ibc;
1149         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1150                S390_ARCH_FAC_LIST_SIZE_BYTE);
1151         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1152                  kvm->arch.model.ibc,
1153                  kvm->arch.model.cpuid);
1154         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1155                  kvm->arch.model.fac_list[0],
1156                  kvm->arch.model.fac_list[1],
1157                  kvm->arch.model.fac_list[2]);
1158         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1159                 ret = -EFAULT;
1160         kfree(proc);
1161 out:
1162         return ret;
1163 }
1164
1165 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1166 {
1167         struct kvm_s390_vm_cpu_machine *mach;
1168         int ret = 0;
1169
1170         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1171         if (!mach) {
1172                 ret = -ENOMEM;
1173                 goto out;
1174         }
1175         get_cpu_id((struct cpuid *) &mach->cpuid);
1176         mach->ibc = sclp.ibc;
1177         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1178                S390_ARCH_FAC_LIST_SIZE_BYTE);
1179         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1180                sizeof(S390_lowcore.stfle_fac_list));
1181         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1182                  kvm->arch.model.ibc,
1183                  kvm->arch.model.cpuid);
1184         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1185                  mach->fac_mask[0],
1186                  mach->fac_mask[1],
1187                  mach->fac_mask[2]);
1188         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1189                  mach->fac_list[0],
1190                  mach->fac_list[1],
1191                  mach->fac_list[2]);
1192         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1193                 ret = -EFAULT;
1194         kfree(mach);
1195 out:
1196         return ret;
1197 }
1198
1199 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1200                                        struct kvm_device_attr *attr)
1201 {
1202         struct kvm_s390_vm_cpu_feat data;
1203
1204         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1205                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1206         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1207                 return -EFAULT;
1208         return 0;
1209 }
1210
1211 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1212                                      struct kvm_device_attr *attr)
1213 {
1214         struct kvm_s390_vm_cpu_feat data;
1215
1216         bitmap_copy((unsigned long *) data.feat,
1217                     kvm_s390_available_cpu_feat,
1218                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1219         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1220                 return -EFAULT;
1221         return 0;
1222 }
1223
1224 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1225                                           struct kvm_device_attr *attr)
1226 {
1227         /*
1228          * Once we can actually configure subfunctions (kernel + hw support),
1229          * we have to check if they were already set by user space, if so copy
1230          * them from kvm->arch.
1231          */
1232         return -ENXIO;
1233 }
1234
1235 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1236                                         struct kvm_device_attr *attr)
1237 {
1238         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1239             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1240                 return -EFAULT;
1241         return 0;
1242 }
1243 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1244 {
1245         int ret = -ENXIO;
1246
1247         switch (attr->attr) {
1248         case KVM_S390_VM_CPU_PROCESSOR:
1249                 ret = kvm_s390_get_processor(kvm, attr);
1250                 break;
1251         case KVM_S390_VM_CPU_MACHINE:
1252                 ret = kvm_s390_get_machine(kvm, attr);
1253                 break;
1254         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1255                 ret = kvm_s390_get_processor_feat(kvm, attr);
1256                 break;
1257         case KVM_S390_VM_CPU_MACHINE_FEAT:
1258                 ret = kvm_s390_get_machine_feat(kvm, attr);
1259                 break;
1260         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1261                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1262                 break;
1263         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1264                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1265                 break;
1266         }
1267         return ret;
1268 }
1269
1270 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1271 {
1272         int ret;
1273
1274         switch (attr->group) {
1275         case KVM_S390_VM_MEM_CTRL:
1276                 ret = kvm_s390_set_mem_control(kvm, attr);
1277                 break;
1278         case KVM_S390_VM_TOD:
1279                 ret = kvm_s390_set_tod(kvm, attr);
1280                 break;
1281         case KVM_S390_VM_CPU_MODEL:
1282                 ret = kvm_s390_set_cpu_model(kvm, attr);
1283                 break;
1284         case KVM_S390_VM_CRYPTO:
1285                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1286                 break;
1287         case KVM_S390_VM_MIGRATION:
1288                 ret = kvm_s390_vm_set_migration(kvm, attr);
1289                 break;
1290         default:
1291                 ret = -ENXIO;
1292                 break;
1293         }
1294
1295         return ret;
1296 }
1297
1298 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1299 {
1300         int ret;
1301
1302         switch (attr->group) {
1303         case KVM_S390_VM_MEM_CTRL:
1304                 ret = kvm_s390_get_mem_control(kvm, attr);
1305                 break;
1306         case KVM_S390_VM_TOD:
1307                 ret = kvm_s390_get_tod(kvm, attr);
1308                 break;
1309         case KVM_S390_VM_CPU_MODEL:
1310                 ret = kvm_s390_get_cpu_model(kvm, attr);
1311                 break;
1312         case KVM_S390_VM_MIGRATION:
1313                 ret = kvm_s390_vm_get_migration(kvm, attr);
1314                 break;
1315         default:
1316                 ret = -ENXIO;
1317                 break;
1318         }
1319
1320         return ret;
1321 }
1322
1323 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1324 {
1325         int ret;
1326
1327         switch (attr->group) {
1328         case KVM_S390_VM_MEM_CTRL:
1329                 switch (attr->attr) {
1330                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1331                 case KVM_S390_VM_MEM_CLR_CMMA:
1332                         ret = sclp.has_cmma ? 0 : -ENXIO;
1333                         break;
1334                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1335                         ret = 0;
1336                         break;
1337                 default:
1338                         ret = -ENXIO;
1339                         break;
1340                 }
1341                 break;
1342         case KVM_S390_VM_TOD:
1343                 switch (attr->attr) {
1344                 case KVM_S390_VM_TOD_LOW:
1345                 case KVM_S390_VM_TOD_HIGH:
1346                         ret = 0;
1347                         break;
1348                 default:
1349                         ret = -ENXIO;
1350                         break;
1351                 }
1352                 break;
1353         case KVM_S390_VM_CPU_MODEL:
1354                 switch (attr->attr) {
1355                 case KVM_S390_VM_CPU_PROCESSOR:
1356                 case KVM_S390_VM_CPU_MACHINE:
1357                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1358                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1359                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1360                         ret = 0;
1361                         break;
1362                 /* configuring subfunctions is not supported yet */
1363                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1364                 default:
1365                         ret = -ENXIO;
1366                         break;
1367                 }
1368                 break;
1369         case KVM_S390_VM_CRYPTO:
1370                 switch (attr->attr) {
1371                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1372                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1373                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1374                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1375                         ret = 0;
1376                         break;
1377                 default:
1378                         ret = -ENXIO;
1379                         break;
1380                 }
1381                 break;
1382         case KVM_S390_VM_MIGRATION:
1383                 ret = 0;
1384                 break;
1385         default:
1386                 ret = -ENXIO;
1387                 break;
1388         }
1389
1390         return ret;
1391 }
1392
1393 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1394 {
1395         uint8_t *keys;
1396         uint64_t hva;
1397         int srcu_idx, i, r = 0;
1398
1399         if (args->flags != 0)
1400                 return -EINVAL;
1401
1402         /* Is this guest using storage keys? */
1403         if (!mm_use_skey(current->mm))
1404                 return KVM_S390_GET_SKEYS_NONE;
1405
1406         /* Enforce sane limit on memory allocation */
1407         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1408                 return -EINVAL;
1409
1410         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1411         if (!keys)
1412                 return -ENOMEM;
1413
1414         down_read(&current->mm->mmap_sem);
1415         srcu_idx = srcu_read_lock(&kvm->srcu);
1416         for (i = 0; i < args->count; i++) {
1417                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1418                 if (kvm_is_error_hva(hva)) {
1419                         r = -EFAULT;
1420                         break;
1421                 }
1422
1423                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1424                 if (r)
1425                         break;
1426         }
1427         srcu_read_unlock(&kvm->srcu, srcu_idx);
1428         up_read(&current->mm->mmap_sem);
1429
1430         if (!r) {
1431                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1432                                  sizeof(uint8_t) * args->count);
1433                 if (r)
1434                         r = -EFAULT;
1435         }
1436
1437         kvfree(keys);
1438         return r;
1439 }
1440
1441 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1442 {
1443         uint8_t *keys;
1444         uint64_t hva;
1445         int srcu_idx, i, r = 0;
1446
1447         if (args->flags != 0)
1448                 return -EINVAL;
1449
1450         /* Enforce sane limit on memory allocation */
1451         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1452                 return -EINVAL;
1453
1454         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1455         if (!keys)
1456                 return -ENOMEM;
1457
1458         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1459                            sizeof(uint8_t) * args->count);
1460         if (r) {
1461                 r = -EFAULT;
1462                 goto out;
1463         }
1464
1465         /* Enable storage key handling for the guest */
1466         r = s390_enable_skey();
1467         if (r)
1468                 goto out;
1469
1470         down_read(&current->mm->mmap_sem);
1471         srcu_idx = srcu_read_lock(&kvm->srcu);
1472         for (i = 0; i < args->count; i++) {
1473                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1474                 if (kvm_is_error_hva(hva)) {
1475                         r = -EFAULT;
1476                         break;
1477                 }
1478
1479                 /* Lowest order bit is reserved */
1480                 if (keys[i] & 0x01) {
1481                         r = -EINVAL;
1482                         break;
1483                 }
1484
1485                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1486                 if (r)
1487                         break;
1488         }
1489         srcu_read_unlock(&kvm->srcu, srcu_idx);
1490         up_read(&current->mm->mmap_sem);
1491 out:
1492         kvfree(keys);
1493         return r;
1494 }
1495
1496 /*
1497  * Base address and length must be sent at the start of each block, therefore
1498  * it's cheaper to send some clean data, as long as it's less than the size of
1499  * two longs.
1500  */
1501 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1502 /* for consistency */
1503 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1504
1505 /*
1506  * This function searches for the next page with dirty CMMA attributes, and
1507  * saves the attributes in the buffer up to either the end of the buffer or
1508  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1509  * no trailing clean bytes are saved.
1510  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1511  * output buffer will indicate 0 as length.
1512  */
1513 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1514                                   struct kvm_s390_cmma_log *args)
1515 {
1516         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1517         unsigned long bufsize, hva, pgstev, i, next, cur;
1518         int srcu_idx, peek, r = 0, rr;
1519         u8 *res;
1520
1521         cur = args->start_gfn;
1522         i = next = pgstev = 0;
1523
1524         if (unlikely(!kvm->arch.use_cmma))
1525                 return -ENXIO;
1526         /* Invalid/unsupported flags were specified */
1527         if (args->flags & ~KVM_S390_CMMA_PEEK)
1528                 return -EINVAL;
1529         /* Migration mode query, and we are not doing a migration */
1530         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1531         if (!peek && !s)
1532                 return -EINVAL;
1533         /* CMMA is disabled or was not used, or the buffer has length zero */
1534         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1535         if (!bufsize || !kvm->mm->context.use_cmma) {
1536                 memset(args, 0, sizeof(*args));
1537                 return 0;
1538         }
1539
1540         if (!peek) {
1541                 /* We are not peeking, and there are no dirty pages */
1542                 if (!atomic64_read(&s->dirty_pages)) {
1543                         memset(args, 0, sizeof(*args));
1544                         return 0;
1545                 }
1546                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1547                                     args->start_gfn);
1548                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1549                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1550                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1551                         memset(args, 0, sizeof(*args));
1552                         return 0;
1553                 }
1554                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1555         }
1556
1557         res = vmalloc(bufsize);
1558         if (!res)
1559                 return -ENOMEM;
1560
1561         args->start_gfn = cur;
1562
1563         down_read(&kvm->mm->mmap_sem);
1564         srcu_idx = srcu_read_lock(&kvm->srcu);
1565         while (i < bufsize) {
1566                 hva = gfn_to_hva(kvm, cur);
1567                 if (kvm_is_error_hva(hva)) {
1568                         r = -EFAULT;
1569                         break;
1570                 }
1571                 /* decrement only if we actually flipped the bit to 0 */
1572                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1573                         atomic64_dec(&s->dirty_pages);
1574                 r = get_pgste(kvm->mm, hva, &pgstev);
1575                 if (r < 0)
1576                         pgstev = 0;
1577                 /* save the value */
1578                 res[i++] = (pgstev >> 24) & 0x43;
1579                 /*
1580                  * if the next bit is too far away, stop.
1581                  * if we reached the previous "next", find the next one
1582                  */
1583                 if (!peek) {
1584                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1585                                 break;
1586                         if (cur == next)
1587                                 next = find_next_bit(s->pgste_bitmap,
1588                                                      s->bitmap_size, cur + 1);
1589                 /* reached the end of the bitmap or of the buffer, stop */
1590                         if ((next >= s->bitmap_size) ||
1591                             (next >= args->start_gfn + bufsize))
1592                                 break;
1593                 }
1594                 cur++;
1595         }
1596         srcu_read_unlock(&kvm->srcu, srcu_idx);
1597         up_read(&kvm->mm->mmap_sem);
1598         args->count = i;
1599         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1600
1601         rr = copy_to_user((void __user *)args->values, res, args->count);
1602         if (rr)
1603                 r = -EFAULT;
1604
1605         vfree(res);
1606         return r;
1607 }
1608
1609 /*
1610  * This function sets the CMMA attributes for the given pages. If the input
1611  * buffer has zero length, no action is taken, otherwise the attributes are
1612  * set and the mm->context.use_cmma flag is set.
1613  */
1614 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1615                                   const struct kvm_s390_cmma_log *args)
1616 {
1617         unsigned long hva, mask, pgstev, i;
1618         uint8_t *bits;
1619         int srcu_idx, r = 0;
1620
1621         mask = args->mask;
1622
1623         if (!kvm->arch.use_cmma)
1624                 return -ENXIO;
1625         /* invalid/unsupported flags */
1626         if (args->flags != 0)
1627                 return -EINVAL;
1628         /* Enforce sane limit on memory allocation */
1629         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1630                 return -EINVAL;
1631         /* Nothing to do */
1632         if (args->count == 0)
1633                 return 0;
1634
1635         bits = vmalloc(sizeof(*bits) * args->count);
1636         if (!bits)
1637                 return -ENOMEM;
1638
1639         r = copy_from_user(bits, (void __user *)args->values, args->count);
1640         if (r) {
1641                 r = -EFAULT;
1642                 goto out;
1643         }
1644
1645         down_read(&kvm->mm->mmap_sem);
1646         srcu_idx = srcu_read_lock(&kvm->srcu);
1647         for (i = 0; i < args->count; i++) {
1648                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1649                 if (kvm_is_error_hva(hva)) {
1650                         r = -EFAULT;
1651                         break;
1652                 }
1653
1654                 pgstev = bits[i];
1655                 pgstev = pgstev << 24;
1656                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1657                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1658         }
1659         srcu_read_unlock(&kvm->srcu, srcu_idx);
1660         up_read(&kvm->mm->mmap_sem);
1661
1662         if (!kvm->mm->context.use_cmma) {
1663                 down_write(&kvm->mm->mmap_sem);
1664                 kvm->mm->context.use_cmma = 1;
1665                 up_write(&kvm->mm->mmap_sem);
1666         }
1667 out:
1668         vfree(bits);
1669         return r;
1670 }
1671
1672 long kvm_arch_vm_ioctl(struct file *filp,
1673                        unsigned int ioctl, unsigned long arg)
1674 {
1675         struct kvm *kvm = filp->private_data;
1676         void __user *argp = (void __user *)arg;
1677         struct kvm_device_attr attr;
1678         int r;
1679
1680         switch (ioctl) {
1681         case KVM_S390_INTERRUPT: {
1682                 struct kvm_s390_interrupt s390int;
1683
1684                 r = -EFAULT;
1685                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1686                         break;
1687                 r = kvm_s390_inject_vm(kvm, &s390int);
1688                 break;
1689         }
1690         case KVM_ENABLE_CAP: {
1691                 struct kvm_enable_cap cap;
1692                 r = -EFAULT;
1693                 if (copy_from_user(&cap, argp, sizeof(cap)))
1694                         break;
1695                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1696                 break;
1697         }
1698         case KVM_CREATE_IRQCHIP: {
1699                 struct kvm_irq_routing_entry routing;
1700
1701                 r = -EINVAL;
1702                 if (kvm->arch.use_irqchip) {
1703                         /* Set up dummy routing. */
1704                         memset(&routing, 0, sizeof(routing));
1705                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1706                 }
1707                 break;
1708         }
1709         case KVM_SET_DEVICE_ATTR: {
1710                 r = -EFAULT;
1711                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1712                         break;
1713                 r = kvm_s390_vm_set_attr(kvm, &attr);
1714                 break;
1715         }
1716         case KVM_GET_DEVICE_ATTR: {
1717                 r = -EFAULT;
1718                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1719                         break;
1720                 r = kvm_s390_vm_get_attr(kvm, &attr);
1721                 break;
1722         }
1723         case KVM_HAS_DEVICE_ATTR: {
1724                 r = -EFAULT;
1725                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1726                         break;
1727                 r = kvm_s390_vm_has_attr(kvm, &attr);
1728                 break;
1729         }
1730         case KVM_S390_GET_SKEYS: {
1731                 struct kvm_s390_skeys args;
1732
1733                 r = -EFAULT;
1734                 if (copy_from_user(&args, argp,
1735                                    sizeof(struct kvm_s390_skeys)))
1736                         break;
1737                 r = kvm_s390_get_skeys(kvm, &args);
1738                 break;
1739         }
1740         case KVM_S390_SET_SKEYS: {
1741                 struct kvm_s390_skeys args;
1742
1743                 r = -EFAULT;
1744                 if (copy_from_user(&args, argp,
1745                                    sizeof(struct kvm_s390_skeys)))
1746                         break;
1747                 r = kvm_s390_set_skeys(kvm, &args);
1748                 break;
1749         }
1750         case KVM_S390_GET_CMMA_BITS: {
1751                 struct kvm_s390_cmma_log args;
1752
1753                 r = -EFAULT;
1754                 if (copy_from_user(&args, argp, sizeof(args)))
1755                         break;
1756                 r = kvm_s390_get_cmma_bits(kvm, &args);
1757                 if (!r) {
1758                         r = copy_to_user(argp, &args, sizeof(args));
1759                         if (r)
1760                                 r = -EFAULT;
1761                 }
1762                 break;
1763         }
1764         case KVM_S390_SET_CMMA_BITS: {
1765                 struct kvm_s390_cmma_log args;
1766
1767                 r = -EFAULT;
1768                 if (copy_from_user(&args, argp, sizeof(args)))
1769                         break;
1770                 r = kvm_s390_set_cmma_bits(kvm, &args);
1771                 break;
1772         }
1773         default:
1774                 r = -ENOTTY;
1775         }
1776
1777         return r;
1778 }
1779
1780 static int kvm_s390_query_ap_config(u8 *config)
1781 {
1782         u32 fcn_code = 0x04000000UL;
1783         u32 cc = 0;
1784
1785         memset(config, 0, 128);
1786         asm volatile(
1787                 "lgr 0,%1\n"
1788                 "lgr 2,%2\n"
1789                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1790                 "0: ipm %0\n"
1791                 "srl %0,28\n"
1792                 "1:\n"
1793                 EX_TABLE(0b, 1b)
1794                 : "+r" (cc)
1795                 : "r" (fcn_code), "r" (config)
1796                 : "cc", "0", "2", "memory"
1797         );
1798
1799         return cc;
1800 }
1801
1802 static int kvm_s390_apxa_installed(void)
1803 {
1804         u8 config[128];
1805         int cc;
1806
1807         if (test_facility(12)) {
1808                 cc = kvm_s390_query_ap_config(config);
1809
1810                 if (cc)
1811                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1812                 else
1813                         return config[0] & 0x40;
1814         }
1815
1816         return 0;
1817 }
1818
1819 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1820 {
1821         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1822
1823         if (kvm_s390_apxa_installed())
1824                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1825         else
1826                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1827 }
1828
1829 static u64 kvm_s390_get_initial_cpuid(void)
1830 {
1831         struct cpuid cpuid;
1832
1833         get_cpu_id(&cpuid);
1834         cpuid.version = 0xff;
1835         return *((u64 *) &cpuid);
1836 }
1837
1838 static void kvm_s390_crypto_init(struct kvm *kvm)
1839 {
1840         if (!test_kvm_facility(kvm, 76))
1841                 return;
1842
1843         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1844         kvm_s390_set_crycb_format(kvm);
1845
1846         /* Enable AES/DEA protected key functions by default */
1847         kvm->arch.crypto.aes_kw = 1;
1848         kvm->arch.crypto.dea_kw = 1;
1849         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1850                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1851         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1852                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1853 }
1854
1855 static void sca_dispose(struct kvm *kvm)
1856 {
1857         if (kvm->arch.use_esca)
1858                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1859         else
1860                 free_page((unsigned long)(kvm->arch.sca));
1861         kvm->arch.sca = NULL;
1862 }
1863
1864 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1865 {
1866         gfp_t alloc_flags = GFP_KERNEL;
1867         int i, rc;
1868         char debug_name[16];
1869         static unsigned long sca_offset;
1870
1871         rc = -EINVAL;
1872 #ifdef CONFIG_KVM_S390_UCONTROL
1873         if (type & ~KVM_VM_S390_UCONTROL)
1874                 goto out_err;
1875         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1876                 goto out_err;
1877 #else
1878         if (type)
1879                 goto out_err;
1880 #endif
1881
1882         rc = s390_enable_sie();
1883         if (rc)
1884                 goto out_err;
1885
1886         rc = -ENOMEM;
1887
1888         kvm->arch.use_esca = 0; /* start with basic SCA */
1889         if (!sclp.has_64bscao)
1890                 alloc_flags |= GFP_DMA;
1891         rwlock_init(&kvm->arch.sca_lock);
1892         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1893         if (!kvm->arch.sca)
1894                 goto out_err;
1895         spin_lock(&kvm_lock);
1896         sca_offset += 16;
1897         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1898                 sca_offset = 0;
1899         kvm->arch.sca = (struct bsca_block *)
1900                         ((char *) kvm->arch.sca + sca_offset);
1901         spin_unlock(&kvm_lock);
1902
1903         sprintf(debug_name, "kvm-%u", current->pid);
1904
1905         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1906         if (!kvm->arch.dbf)
1907                 goto out_err;
1908
1909         kvm->arch.sie_page2 =
1910              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1911         if (!kvm->arch.sie_page2)
1912                 goto out_err;
1913
1914         /* Populate the facility mask initially. */
1915         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1916                sizeof(S390_lowcore.stfle_fac_list));
1917         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1918                 if (i < kvm_s390_fac_list_mask_size())
1919                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1920                 else
1921                         kvm->arch.model.fac_mask[i] = 0UL;
1922         }
1923
1924         /* Populate the facility list initially. */
1925         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1926         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1927                S390_ARCH_FAC_LIST_SIZE_BYTE);
1928
1929         /* we are always in czam mode - even on pre z14 machines */
1930         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1931         set_kvm_facility(kvm->arch.model.fac_list, 138);
1932         /* we emulate STHYI in kvm */
1933         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1934         set_kvm_facility(kvm->arch.model.fac_list, 74);
1935         if (MACHINE_HAS_TLB_GUEST) {
1936                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1937                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1938         }
1939
1940         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1941         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1942
1943         kvm_s390_crypto_init(kvm);
1944
1945         mutex_init(&kvm->arch.float_int.ais_lock);
1946         kvm->arch.float_int.simm = 0;
1947         kvm->arch.float_int.nimm = 0;
1948         spin_lock_init(&kvm->arch.float_int.lock);
1949         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1950                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1951         init_waitqueue_head(&kvm->arch.ipte_wq);
1952         mutex_init(&kvm->arch.ipte_mutex);
1953
1954         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1955         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1956
1957         if (type & KVM_VM_S390_UCONTROL) {
1958                 kvm->arch.gmap = NULL;
1959                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1960         } else {
1961                 if (sclp.hamax == U64_MAX)
1962                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1963                 else
1964                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1965                                                     sclp.hamax + 1);
1966                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1967                 if (!kvm->arch.gmap)
1968                         goto out_err;
1969                 kvm->arch.gmap->private = kvm;
1970                 kvm->arch.gmap->pfault_enabled = 0;
1971         }
1972
1973         kvm->arch.css_support = 0;
1974         kvm->arch.use_irqchip = 0;
1975         kvm->arch.epoch = 0;
1976
1977         spin_lock_init(&kvm->arch.start_stop_lock);
1978         kvm_s390_vsie_init(kvm);
1979         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1980
1981         return 0;
1982 out_err:
1983         free_page((unsigned long)kvm->arch.sie_page2);
1984         debug_unregister(kvm->arch.dbf);
1985         sca_dispose(kvm);
1986         KVM_EVENT(3, "creation of vm failed: %d", rc);
1987         return rc;
1988 }
1989
1990 bool kvm_arch_has_vcpu_debugfs(void)
1991 {
1992         return false;
1993 }
1994
1995 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1996 {
1997         return 0;
1998 }
1999
2000 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2001 {
2002         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2003         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2004         kvm_s390_clear_local_irqs(vcpu);
2005         kvm_clear_async_pf_completion_queue(vcpu);
2006         if (!kvm_is_ucontrol(vcpu->kvm))
2007                 sca_del_vcpu(vcpu);
2008
2009         if (kvm_is_ucontrol(vcpu->kvm))
2010                 gmap_remove(vcpu->arch.gmap);
2011
2012         if (vcpu->kvm->arch.use_cmma)
2013                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2014         free_page((unsigned long)(vcpu->arch.sie_block));
2015
2016         kvm_vcpu_uninit(vcpu);
2017         kmem_cache_free(kvm_vcpu_cache, vcpu);
2018 }
2019
2020 static void kvm_free_vcpus(struct kvm *kvm)
2021 {
2022         unsigned int i;
2023         struct kvm_vcpu *vcpu;
2024
2025         kvm_for_each_vcpu(i, vcpu, kvm)
2026                 kvm_arch_vcpu_destroy(vcpu);
2027
2028         mutex_lock(&kvm->lock);
2029         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2030                 kvm->vcpus[i] = NULL;
2031
2032         atomic_set(&kvm->online_vcpus, 0);
2033         mutex_unlock(&kvm->lock);
2034 }
2035
2036 void kvm_arch_destroy_vm(struct kvm *kvm)
2037 {
2038         kvm_free_vcpus(kvm);
2039         sca_dispose(kvm);
2040         debug_unregister(kvm->arch.dbf);
2041         free_page((unsigned long)kvm->arch.sie_page2);
2042         if (!kvm_is_ucontrol(kvm))
2043                 gmap_remove(kvm->arch.gmap);
2044         kvm_s390_destroy_adapters(kvm);
2045         kvm_s390_clear_float_irqs(kvm);
2046         kvm_s390_vsie_destroy(kvm);
2047         if (kvm->arch.migration_state) {
2048                 vfree(kvm->arch.migration_state->pgste_bitmap);
2049                 kfree(kvm->arch.migration_state);
2050         }
2051         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2052 }
2053
2054 /* Section: vcpu related */
2055 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2056 {
2057         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2058         if (!vcpu->arch.gmap)
2059                 return -ENOMEM;
2060         vcpu->arch.gmap->private = vcpu->kvm;
2061
2062         return 0;
2063 }
2064
2065 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2066 {
2067         if (!kvm_s390_use_sca_entries())
2068                 return;
2069         read_lock(&vcpu->kvm->arch.sca_lock);
2070         if (vcpu->kvm->arch.use_esca) {
2071                 struct esca_block *sca = vcpu->kvm->arch.sca;
2072
2073                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2074                 sca->cpu[vcpu->vcpu_id].sda = 0;
2075         } else {
2076                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2077
2078                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2079                 sca->cpu[vcpu->vcpu_id].sda = 0;
2080         }
2081         read_unlock(&vcpu->kvm->arch.sca_lock);
2082 }
2083
2084 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2085 {
2086         if (!kvm_s390_use_sca_entries()) {
2087                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2088
2089                 /* we still need the basic sca for the ipte control */
2090                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2091                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2092         }
2093         read_lock(&vcpu->kvm->arch.sca_lock);
2094         if (vcpu->kvm->arch.use_esca) {
2095                 struct esca_block *sca = vcpu->kvm->arch.sca;
2096
2097                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2098                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2099                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2100                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2101                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2102         } else {
2103                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2104
2105                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2106                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2107                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2108                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2109         }
2110         read_unlock(&vcpu->kvm->arch.sca_lock);
2111 }
2112
2113 /* Basic SCA to Extended SCA data copy routines */
2114 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2115 {
2116         d->sda = s->sda;
2117         d->sigp_ctrl.c = s->sigp_ctrl.c;
2118         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2119 }
2120
2121 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2122 {
2123         int i;
2124
2125         d->ipte_control = s->ipte_control;
2126         d->mcn[0] = s->mcn;
2127         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2128                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2129 }
2130
2131 static int sca_switch_to_extended(struct kvm *kvm)
2132 {
2133         struct bsca_block *old_sca = kvm->arch.sca;
2134         struct esca_block *new_sca;
2135         struct kvm_vcpu *vcpu;
2136         unsigned int vcpu_idx;
2137         u32 scaol, scaoh;
2138
2139         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2140         if (!new_sca)
2141                 return -ENOMEM;
2142
2143         scaoh = (u32)((u64)(new_sca) >> 32);
2144         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2145
2146         kvm_s390_vcpu_block_all(kvm);
2147         write_lock(&kvm->arch.sca_lock);
2148
2149         sca_copy_b_to_e(new_sca, old_sca);
2150
2151         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2152                 vcpu->arch.sie_block->scaoh = scaoh;
2153                 vcpu->arch.sie_block->scaol = scaol;
2154                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2155         }
2156         kvm->arch.sca = new_sca;
2157         kvm->arch.use_esca = 1;
2158
2159         write_unlock(&kvm->arch.sca_lock);
2160         kvm_s390_vcpu_unblock_all(kvm);
2161
2162         free_page((unsigned long)old_sca);
2163
2164         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2165                  old_sca, kvm->arch.sca);
2166         return 0;
2167 }
2168
2169 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2170 {
2171         int rc;
2172
2173         if (!kvm_s390_use_sca_entries()) {
2174                 if (id < KVM_MAX_VCPUS)
2175                         return true;
2176                 return false;
2177         }
2178         if (id < KVM_S390_BSCA_CPU_SLOTS)
2179                 return true;
2180         if (!sclp.has_esca || !sclp.has_64bscao)
2181                 return false;
2182
2183         mutex_lock(&kvm->lock);
2184         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2185         mutex_unlock(&kvm->lock);
2186
2187         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2188 }
2189
2190 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2191 {
2192         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2193         kvm_clear_async_pf_completion_queue(vcpu);
2194         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2195                                     KVM_SYNC_GPRS |
2196                                     KVM_SYNC_ACRS |
2197                                     KVM_SYNC_CRS |
2198                                     KVM_SYNC_ARCH0 |
2199                                     KVM_SYNC_PFAULT;
2200         kvm_s390_set_prefix(vcpu, 0);
2201         if (test_kvm_facility(vcpu->kvm, 64))
2202                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2203         if (test_kvm_facility(vcpu->kvm, 133))
2204                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2205         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2206          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2207          */
2208         if (MACHINE_HAS_VX)
2209                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2210         else
2211                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2212
2213         if (kvm_is_ucontrol(vcpu->kvm))
2214                 return __kvm_ucontrol_vcpu_init(vcpu);
2215
2216         return 0;
2217 }
2218
2219 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2220 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2221 {
2222         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2223         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2224         vcpu->arch.cputm_start = get_tod_clock_fast();
2225         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2226 }
2227
2228 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2229 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2230 {
2231         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2232         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2233         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2234         vcpu->arch.cputm_start = 0;
2235         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2236 }
2237
2238 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2239 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2240 {
2241         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2242         vcpu->arch.cputm_enabled = true;
2243         __start_cpu_timer_accounting(vcpu);
2244 }
2245
2246 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2247 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2248 {
2249         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2250         __stop_cpu_timer_accounting(vcpu);
2251         vcpu->arch.cputm_enabled = false;
2252 }
2253
2254 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2255 {
2256         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2257         __enable_cpu_timer_accounting(vcpu);
2258         preempt_enable();
2259 }
2260
2261 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2262 {
2263         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2264         __disable_cpu_timer_accounting(vcpu);
2265         preempt_enable();
2266 }
2267
2268 /* set the cpu timer - may only be called from the VCPU thread itself */
2269 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2270 {
2271         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2272         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2273         if (vcpu->arch.cputm_enabled)
2274                 vcpu->arch.cputm_start = get_tod_clock_fast();
2275         vcpu->arch.sie_block->cputm = cputm;
2276         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2277         preempt_enable();
2278 }
2279
2280 /* update and get the cpu timer - can also be called from other VCPU threads */
2281 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2282 {
2283         unsigned int seq;
2284         __u64 value;
2285
2286         if (unlikely(!vcpu->arch.cputm_enabled))
2287                 return vcpu->arch.sie_block->cputm;
2288
2289         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2290         do {
2291                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2292                 /*
2293                  * If the writer would ever execute a read in the critical
2294                  * section, e.g. in irq context, we have a deadlock.
2295                  */
2296                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2297                 value = vcpu->arch.sie_block->cputm;
2298                 /* if cputm_start is 0, accounting is being started/stopped */
2299                 if (likely(vcpu->arch.cputm_start))
2300                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2301         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2302         preempt_enable();
2303         return value;
2304 }
2305
2306 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2307 {
2308
2309         gmap_enable(vcpu->arch.enabled_gmap);
2310         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2311         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2312                 __start_cpu_timer_accounting(vcpu);
2313         vcpu->cpu = cpu;
2314 }
2315
2316 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2317 {
2318         vcpu->cpu = -1;
2319         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2320                 __stop_cpu_timer_accounting(vcpu);
2321         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2322         vcpu->arch.enabled_gmap = gmap_get_enabled();
2323         gmap_disable(vcpu->arch.enabled_gmap);
2324
2325 }
2326
2327 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2328 {
2329         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2330         vcpu->arch.sie_block->gpsw.mask = 0UL;
2331         vcpu->arch.sie_block->gpsw.addr = 0UL;
2332         kvm_s390_set_prefix(vcpu, 0);
2333         kvm_s390_set_cpu_timer(vcpu, 0);
2334         vcpu->arch.sie_block->ckc       = 0UL;
2335         vcpu->arch.sie_block->todpr     = 0;
2336         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2337         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2338         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2339         /* make sure the new fpc will be lazily loaded */
2340         save_fpu_regs();
2341         current->thread.fpu.fpc = 0;
2342         vcpu->arch.sie_block->gbea = 1;
2343         vcpu->arch.sie_block->pp = 0;
2344         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2345         kvm_clear_async_pf_completion_queue(vcpu);
2346         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2347                 kvm_s390_vcpu_stop(vcpu);
2348         kvm_s390_clear_local_irqs(vcpu);
2349 }
2350
2351 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2352 {
2353         mutex_lock(&vcpu->kvm->lock);
2354         preempt_disable();
2355         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2356         preempt_enable();
2357         mutex_unlock(&vcpu->kvm->lock);
2358         if (!kvm_is_ucontrol(vcpu->kvm)) {
2359                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2360                 sca_add_vcpu(vcpu);
2361         }
2362         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2363                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2364         /* make vcpu_load load the right gmap on the first trigger */
2365         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2366 }
2367
2368 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2369 {
2370         if (!test_kvm_facility(vcpu->kvm, 76))
2371                 return;
2372
2373         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2374
2375         if (vcpu->kvm->arch.crypto.aes_kw)
2376                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2377         if (vcpu->kvm->arch.crypto.dea_kw)
2378                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2379
2380         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2381 }
2382
2383 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2384 {
2385         free_page(vcpu->arch.sie_block->cbrlo);
2386         vcpu->arch.sie_block->cbrlo = 0;
2387 }
2388
2389 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2390 {
2391         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2392         if (!vcpu->arch.sie_block->cbrlo)
2393                 return -ENOMEM;
2394
2395         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2396         return 0;
2397 }
2398
2399 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2400 {
2401         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2402
2403         vcpu->arch.sie_block->ibc = model->ibc;
2404         if (test_kvm_facility(vcpu->kvm, 7))
2405                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2406 }
2407
2408 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2409 {
2410         int rc = 0;
2411
2412         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2413                                                     CPUSTAT_SM |
2414                                                     CPUSTAT_STOPPED);
2415
2416         if (test_kvm_facility(vcpu->kvm, 78))
2417                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2418         else if (test_kvm_facility(vcpu->kvm, 8))
2419                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2420
2421         kvm_s390_vcpu_setup_model(vcpu);
2422
2423         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2424         if (MACHINE_HAS_ESOP)
2425                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2426         if (test_kvm_facility(vcpu->kvm, 9))
2427                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2428         if (test_kvm_facility(vcpu->kvm, 73))
2429                 vcpu->arch.sie_block->ecb |= ECB_TE;
2430
2431         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2432                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2433         if (test_kvm_facility(vcpu->kvm, 130))
2434                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2435         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2436         if (sclp.has_cei)
2437                 vcpu->arch.sie_block->eca |= ECA_CEI;
2438         if (sclp.has_ib)
2439                 vcpu->arch.sie_block->eca |= ECA_IB;
2440         if (sclp.has_siif)
2441                 vcpu->arch.sie_block->eca |= ECA_SII;
2442         if (sclp.has_sigpif)
2443                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2444         if (test_kvm_facility(vcpu->kvm, 129)) {
2445                 vcpu->arch.sie_block->eca |= ECA_VX;
2446                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2447         }
2448         if (test_kvm_facility(vcpu->kvm, 139))
2449                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2450
2451         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2452                                         | SDNXC;
2453         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2454
2455         if (sclp.has_kss)
2456                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2457         else
2458                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2459
2460         if (vcpu->kvm->arch.use_cmma) {
2461                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2462                 if (rc)
2463                         return rc;
2464         }
2465         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2466         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2467
2468         kvm_s390_vcpu_crypto_setup(vcpu);
2469
2470         return rc;
2471 }
2472
2473 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2474                                       unsigned int id)
2475 {
2476         struct kvm_vcpu *vcpu;
2477         struct sie_page *sie_page;
2478         int rc = -EINVAL;
2479
2480         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2481                 goto out;
2482
2483         rc = -ENOMEM;
2484
2485         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2486         if (!vcpu)
2487                 goto out;
2488
2489         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2490         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2491         if (!sie_page)
2492                 goto out_free_cpu;
2493
2494         vcpu->arch.sie_block = &sie_page->sie_block;
2495         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2496
2497         /* the real guest size will always be smaller than msl */
2498         vcpu->arch.sie_block->mso = 0;
2499         vcpu->arch.sie_block->msl = sclp.hamax;
2500
2501         vcpu->arch.sie_block->icpua = id;
2502         spin_lock_init(&vcpu->arch.local_int.lock);
2503         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2504         vcpu->arch.local_int.wq = &vcpu->wq;
2505         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2506         seqcount_init(&vcpu->arch.cputm_seqcount);
2507
2508         rc = kvm_vcpu_init(vcpu, kvm, id);
2509         if (rc)
2510                 goto out_free_sie_block;
2511         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2512                  vcpu->arch.sie_block);
2513         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2514
2515         return vcpu;
2516 out_free_sie_block:
2517         free_page((unsigned long)(vcpu->arch.sie_block));
2518 out_free_cpu:
2519         kmem_cache_free(kvm_vcpu_cache, vcpu);
2520 out:
2521         return ERR_PTR(rc);
2522 }
2523
2524 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2525 {
2526         return kvm_s390_vcpu_has_irq(vcpu, 0);
2527 }
2528
2529 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2530 {
2531         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2532 }
2533
2534 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2535 {
2536         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2537         exit_sie(vcpu);
2538 }
2539
2540 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2541 {
2542         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2543 }
2544
2545 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2546 {
2547         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2548         exit_sie(vcpu);
2549 }
2550
2551 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2552 {
2553         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2554 }
2555
2556 /*
2557  * Kick a guest cpu out of SIE and wait until SIE is not running.
2558  * If the CPU is not running (e.g. waiting as idle) the function will
2559  * return immediately. */
2560 void exit_sie(struct kvm_vcpu *vcpu)
2561 {
2562         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2563         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2564                 cpu_relax();
2565 }
2566
2567 /* Kick a guest cpu out of SIE to process a request synchronously */
2568 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2569 {
2570         kvm_make_request(req, vcpu);
2571         kvm_s390_vcpu_request(vcpu);
2572 }
2573
2574 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2575                               unsigned long end)
2576 {
2577         struct kvm *kvm = gmap->private;
2578         struct kvm_vcpu *vcpu;
2579         unsigned long prefix;
2580         int i;
2581
2582         if (gmap_is_shadow(gmap))
2583                 return;
2584         if (start >= 1UL << 31)
2585                 /* We are only interested in prefix pages */
2586                 return;
2587         kvm_for_each_vcpu(i, vcpu, kvm) {
2588                 /* match against both prefix pages */
2589                 prefix = kvm_s390_get_prefix(vcpu);
2590                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2591                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2592                                    start, end);
2593                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2594                 }
2595         }
2596 }
2597
2598 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2599 {
2600         /* kvm common code refers to this, but never calls it */
2601         BUG();
2602         return 0;
2603 }
2604
2605 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2606                                            struct kvm_one_reg *reg)
2607 {
2608         int r = -EINVAL;
2609
2610         switch (reg->id) {
2611         case KVM_REG_S390_TODPR:
2612                 r = put_user(vcpu->arch.sie_block->todpr,
2613                              (u32 __user *)reg->addr);
2614                 break;
2615         case KVM_REG_S390_EPOCHDIFF:
2616                 r = put_user(vcpu->arch.sie_block->epoch,
2617                              (u64 __user *)reg->addr);
2618                 break;
2619         case KVM_REG_S390_CPU_TIMER:
2620                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2621                              (u64 __user *)reg->addr);
2622                 break;
2623         case KVM_REG_S390_CLOCK_COMP:
2624                 r = put_user(vcpu->arch.sie_block->ckc,
2625                              (u64 __user *)reg->addr);
2626                 break;
2627         case KVM_REG_S390_PFTOKEN:
2628                 r = put_user(vcpu->arch.pfault_token,
2629                              (u64 __user *)reg->addr);
2630                 break;
2631         case KVM_REG_S390_PFCOMPARE:
2632                 r = put_user(vcpu->arch.pfault_compare,
2633                              (u64 __user *)reg->addr);
2634                 break;
2635         case KVM_REG_S390_PFSELECT:
2636                 r = put_user(vcpu->arch.pfault_select,
2637                              (u64 __user *)reg->addr);
2638                 break;
2639         case KVM_REG_S390_PP:
2640                 r = put_user(vcpu->arch.sie_block->pp,
2641                              (u64 __user *)reg->addr);
2642                 break;
2643         case KVM_REG_S390_GBEA:
2644                 r = put_user(vcpu->arch.sie_block->gbea,
2645                              (u64 __user *)reg->addr);
2646                 break;
2647         default:
2648                 break;
2649         }
2650
2651         return r;
2652 }
2653
2654 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2655                                            struct kvm_one_reg *reg)
2656 {
2657         int r = -EINVAL;
2658         __u64 val;
2659
2660         switch (reg->id) {
2661         case KVM_REG_S390_TODPR:
2662                 r = get_user(vcpu->arch.sie_block->todpr,
2663                              (u32 __user *)reg->addr);
2664                 break;
2665         case KVM_REG_S390_EPOCHDIFF:
2666                 r = get_user(vcpu->arch.sie_block->epoch,
2667                              (u64 __user *)reg->addr);
2668                 break;
2669         case KVM_REG_S390_CPU_TIMER:
2670                 r = get_user(val, (u64 __user *)reg->addr);
2671                 if (!r)
2672                         kvm_s390_set_cpu_timer(vcpu, val);
2673                 break;
2674         case KVM_REG_S390_CLOCK_COMP:
2675                 r = get_user(vcpu->arch.sie_block->ckc,
2676                              (u64 __user *)reg->addr);
2677                 break;
2678         case KVM_REG_S390_PFTOKEN:
2679                 r = get_user(vcpu->arch.pfault_token,
2680                              (u64 __user *)reg->addr);
2681                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2682                         kvm_clear_async_pf_completion_queue(vcpu);
2683                 break;
2684         case KVM_REG_S390_PFCOMPARE:
2685                 r = get_user(vcpu->arch.pfault_compare,
2686                              (u64 __user *)reg->addr);
2687                 break;
2688         case KVM_REG_S390_PFSELECT:
2689                 r = get_user(vcpu->arch.pfault_select,
2690                              (u64 __user *)reg->addr);
2691                 break;
2692         case KVM_REG_S390_PP:
2693                 r = get_user(vcpu->arch.sie_block->pp,
2694                              (u64 __user *)reg->addr);
2695                 break;
2696         case KVM_REG_S390_GBEA:
2697                 r = get_user(vcpu->arch.sie_block->gbea,
2698                              (u64 __user *)reg->addr);
2699                 break;
2700         default:
2701                 break;
2702         }
2703
2704         return r;
2705 }
2706
2707 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2708 {
2709         kvm_s390_vcpu_initial_reset(vcpu);
2710         return 0;
2711 }
2712
2713 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2714 {
2715         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2716         return 0;
2717 }
2718
2719 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2720 {
2721         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2722         return 0;
2723 }
2724
2725 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2726                                   struct kvm_sregs *sregs)
2727 {
2728         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2729         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2730         return 0;
2731 }
2732
2733 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2734                                   struct kvm_sregs *sregs)
2735 {
2736         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2737         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2738         return 0;
2739 }
2740
2741 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2742 {
2743         if (test_fp_ctl(fpu->fpc))
2744                 return -EINVAL;
2745         vcpu->run->s.regs.fpc = fpu->fpc;
2746         if (MACHINE_HAS_VX)
2747                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2748                                  (freg_t *) fpu->fprs);
2749         else
2750                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2751         return 0;
2752 }
2753
2754 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2755 {
2756         /* make sure we have the latest values */
2757         save_fpu_regs();
2758         if (MACHINE_HAS_VX)
2759                 convert_vx_to_fp((freg_t *) fpu->fprs,
2760                                  (__vector128 *) vcpu->run->s.regs.vrs);
2761         else
2762                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2763         fpu->fpc = vcpu->run->s.regs.fpc;
2764         return 0;
2765 }
2766
2767 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2768 {
2769         int rc = 0;
2770
2771         if (!is_vcpu_stopped(vcpu))
2772                 rc = -EBUSY;
2773         else {
2774                 vcpu->run->psw_mask = psw.mask;
2775                 vcpu->run->psw_addr = psw.addr;
2776         }
2777         return rc;
2778 }
2779
2780 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2781                                   struct kvm_translation *tr)
2782 {
2783         return -EINVAL; /* not implemented yet */
2784 }
2785
2786 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2787                               KVM_GUESTDBG_USE_HW_BP | \
2788                               KVM_GUESTDBG_ENABLE)
2789
2790 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2791                                         struct kvm_guest_debug *dbg)
2792 {
2793         int rc = 0;
2794
2795         vcpu->guest_debug = 0;
2796         kvm_s390_clear_bp_data(vcpu);
2797
2798         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2799                 return -EINVAL;
2800         if (!sclp.has_gpere)
2801                 return -EINVAL;
2802
2803         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2804                 vcpu->guest_debug = dbg->control;
2805                 /* enforce guest PER */
2806                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2807
2808                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2809                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2810         } else {
2811                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2812                 vcpu->arch.guestdbg.last_bp = 0;
2813         }
2814
2815         if (rc) {
2816                 vcpu->guest_debug = 0;
2817                 kvm_s390_clear_bp_data(vcpu);
2818                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2819         }
2820
2821         return rc;
2822 }
2823
2824 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2825                                     struct kvm_mp_state *mp_state)
2826 {
2827         /* CHECK_STOP and LOAD are not supported yet */
2828         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2829                                        KVM_MP_STATE_OPERATING;
2830 }
2831
2832 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2833                                     struct kvm_mp_state *mp_state)
2834 {
2835         int rc = 0;
2836
2837         /* user space knows about this interface - let it control the state */
2838         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2839
2840         switch (mp_state->mp_state) {
2841         case KVM_MP_STATE_STOPPED:
2842                 kvm_s390_vcpu_stop(vcpu);
2843                 break;
2844         case KVM_MP_STATE_OPERATING:
2845                 kvm_s390_vcpu_start(vcpu);
2846                 break;
2847         case KVM_MP_STATE_LOAD:
2848         case KVM_MP_STATE_CHECK_STOP:
2849                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2850         default:
2851                 rc = -ENXIO;
2852         }
2853
2854         return rc;
2855 }
2856
2857 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2858 {
2859         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2860 }
2861
2862 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2863 {
2864 retry:
2865         kvm_s390_vcpu_request_handled(vcpu);
2866         if (!kvm_request_pending(vcpu))
2867                 return 0;
2868         /*
2869          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2870          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2871          * This ensures that the ipte instruction for this request has
2872          * already finished. We might race against a second unmapper that
2873          * wants to set the blocking bit. Lets just retry the request loop.
2874          */
2875         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2876                 int rc;
2877                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2878                                           kvm_s390_get_prefix(vcpu),
2879                                           PAGE_SIZE * 2, PROT_WRITE);
2880                 if (rc) {
2881                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2882                         return rc;
2883                 }
2884                 goto retry;
2885         }
2886
2887         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2888                 vcpu->arch.sie_block->ihcpu = 0xffff;
2889                 goto retry;
2890         }
2891
2892         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2893                 if (!ibs_enabled(vcpu)) {
2894                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2895                         atomic_or(CPUSTAT_IBS,
2896                                         &vcpu->arch.sie_block->cpuflags);
2897                 }
2898                 goto retry;
2899         }
2900
2901         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2902                 if (ibs_enabled(vcpu)) {
2903                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2904                         atomic_andnot(CPUSTAT_IBS,
2905                                           &vcpu->arch.sie_block->cpuflags);
2906                 }
2907                 goto retry;
2908         }
2909
2910         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2911                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2912                 goto retry;
2913         }
2914
2915         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2916                 /*
2917                  * Disable CMMA virtualization; we will emulate the ESSA
2918                  * instruction manually, in order to provide additional
2919                  * functionalities needed for live migration.
2920                  */
2921                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2922                 goto retry;
2923         }
2924
2925         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2926                 /*
2927                  * Re-enable CMMA virtualization if CMMA is available and
2928                  * was used.
2929                  */
2930                 if ((vcpu->kvm->arch.use_cmma) &&
2931                     (vcpu->kvm->mm->context.use_cmma))
2932                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2933                 goto retry;
2934         }
2935
2936         /* nothing to do, just clear the request */
2937         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2938
2939         return 0;
2940 }
2941
2942 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2943                                  const struct kvm_s390_vm_tod_clock *gtod)
2944 {
2945         struct kvm_vcpu *vcpu;
2946         struct kvm_s390_tod_clock_ext htod;
2947         int i;
2948
2949         mutex_lock(&kvm->lock);
2950         preempt_disable();
2951
2952         get_tod_clock_ext((char *)&htod);
2953
2954         kvm->arch.epoch = gtod->tod - htod.tod;
2955         kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2956
2957         if (kvm->arch.epoch > gtod->tod)
2958                 kvm->arch.epdx -= 1;
2959
2960         kvm_s390_vcpu_block_all(kvm);
2961         kvm_for_each_vcpu(i, vcpu, kvm) {
2962                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2963                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2964         }
2965
2966         kvm_s390_vcpu_unblock_all(kvm);
2967         preempt_enable();
2968         mutex_unlock(&kvm->lock);
2969 }
2970
2971 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2972 {
2973         struct kvm_vcpu *vcpu;
2974         int i;
2975
2976         mutex_lock(&kvm->lock);
2977         preempt_disable();
2978         kvm->arch.epoch = tod - get_tod_clock();
2979         kvm_s390_vcpu_block_all(kvm);
2980         kvm_for_each_vcpu(i, vcpu, kvm)
2981                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2982         kvm_s390_vcpu_unblock_all(kvm);
2983         preempt_enable();
2984         mutex_unlock(&kvm->lock);
2985 }
2986
2987 /**
2988  * kvm_arch_fault_in_page - fault-in guest page if necessary
2989  * @vcpu: The corresponding virtual cpu
2990  * @gpa: Guest physical address
2991  * @writable: Whether the page should be writable or not
2992  *
2993  * Make sure that a guest page has been faulted-in on the host.
2994  *
2995  * Return: Zero on success, negative error code otherwise.
2996  */
2997 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2998 {
2999         return gmap_fault(vcpu->arch.gmap, gpa,
3000                           writable ? FAULT_FLAG_WRITE : 0);
3001 }
3002
3003 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3004                                       unsigned long token)
3005 {
3006         struct kvm_s390_interrupt inti;
3007         struct kvm_s390_irq irq;
3008
3009         if (start_token) {
3010                 irq.u.ext.ext_params2 = token;
3011                 irq.type = KVM_S390_INT_PFAULT_INIT;
3012                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3013         } else {
3014                 inti.type = KVM_S390_INT_PFAULT_DONE;
3015                 inti.parm64 = token;
3016                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3017         }
3018 }
3019
3020 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3021                                      struct kvm_async_pf *work)
3022 {
3023         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3024         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3025 }
3026
3027 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3028                                  struct kvm_async_pf *work)
3029 {
3030         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3031         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3032 }
3033
3034 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3035                                struct kvm_async_pf *work)
3036 {
3037         /* s390 will always inject the page directly */
3038 }
3039
3040 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3041 {
3042         /*
3043          * s390 will always inject the page directly,
3044          * but we still want check_async_completion to cleanup
3045          */
3046         return true;
3047 }
3048
3049 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3050 {
3051         hva_t hva;
3052         struct kvm_arch_async_pf arch;
3053         int rc;
3054
3055         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3056                 return 0;
3057         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3058             vcpu->arch.pfault_compare)
3059                 return 0;
3060         if (psw_extint_disabled(vcpu))
3061                 return 0;
3062         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3063                 return 0;
3064         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3065                 return 0;
3066         if (!vcpu->arch.gmap->pfault_enabled)
3067                 return 0;
3068
3069         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3070         hva += current->thread.gmap_addr & ~PAGE_MASK;
3071         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3072                 return 0;
3073
3074         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3075         return rc;
3076 }
3077
3078 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3079 {
3080         int rc, cpuflags;
3081
3082         /*
3083          * On s390 notifications for arriving pages will be delivered directly
3084          * to the guest but the house keeping for completed pfaults is
3085          * handled outside the worker.
3086          */
3087         kvm_check_async_pf_completion(vcpu);
3088
3089         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3090         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3091
3092         if (need_resched())
3093                 schedule();
3094
3095         if (test_cpu_flag(CIF_MCCK_PENDING))
3096                 s390_handle_mcck();
3097
3098         if (!kvm_is_ucontrol(vcpu->kvm)) {
3099                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3100                 if (rc)
3101                         return rc;
3102         }
3103
3104         rc = kvm_s390_handle_requests(vcpu);
3105         if (rc)
3106                 return rc;
3107
3108         if (guestdbg_enabled(vcpu)) {
3109                 kvm_s390_backup_guest_per_regs(vcpu);
3110                 kvm_s390_patch_guest_per_regs(vcpu);
3111         }
3112
3113         vcpu->arch.sie_block->icptcode = 0;
3114         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3115         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3116         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3117
3118         return 0;
3119 }
3120
3121 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3122 {
3123         struct kvm_s390_pgm_info pgm_info = {
3124                 .code = PGM_ADDRESSING,
3125         };
3126         u8 opcode, ilen;
3127         int rc;
3128
3129         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3130         trace_kvm_s390_sie_fault(vcpu);
3131
3132         /*
3133          * We want to inject an addressing exception, which is defined as a
3134          * suppressing or terminating exception. However, since we came here
3135          * by a DAT access exception, the PSW still points to the faulting
3136          * instruction since DAT exceptions are nullifying. So we've got
3137          * to look up the current opcode to get the length of the instruction
3138          * to be able to forward the PSW.
3139          */
3140         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3141         ilen = insn_length(opcode);
3142         if (rc < 0) {
3143                 return rc;
3144         } else if (rc) {
3145                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3146                  * Forward by arbitrary ilc, injection will take care of
3147                  * nullification if necessary.
3148                  */
3149                 pgm_info = vcpu->arch.pgm;
3150                 ilen = 4;
3151         }
3152         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3153         kvm_s390_forward_psw(vcpu, ilen);
3154         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3155 }
3156
3157 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3158 {
3159         struct mcck_volatile_info *mcck_info;
3160         struct sie_page *sie_page;
3161
3162         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3163                    vcpu->arch.sie_block->icptcode);
3164         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3165
3166         if (guestdbg_enabled(vcpu))
3167                 kvm_s390_restore_guest_per_regs(vcpu);
3168
3169         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3170         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3171
3172         if (exit_reason == -EINTR) {
3173                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3174                 sie_page = container_of(vcpu->arch.sie_block,
3175                                         struct sie_page, sie_block);
3176                 mcck_info = &sie_page->mcck_info;
3177                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3178                 return 0;
3179         }
3180
3181         if (vcpu->arch.sie_block->icptcode > 0) {
3182                 int rc = kvm_handle_sie_intercept(vcpu);
3183
3184                 if (rc != -EOPNOTSUPP)
3185                         return rc;
3186                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3187                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3188                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3189                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3190                 return -EREMOTE;
3191         } else if (exit_reason != -EFAULT) {
3192                 vcpu->stat.exit_null++;
3193                 return 0;
3194         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3195                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3196                 vcpu->run->s390_ucontrol.trans_exc_code =
3197                                                 current->thread.gmap_addr;
3198                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3199                 return -EREMOTE;
3200         } else if (current->thread.gmap_pfault) {
3201                 trace_kvm_s390_major_guest_pfault(vcpu);
3202                 current->thread.gmap_pfault = 0;
3203                 if (kvm_arch_setup_async_pf(vcpu))
3204                         return 0;
3205                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3206         }
3207         return vcpu_post_run_fault_in_sie(vcpu);
3208 }
3209
3210 static int __vcpu_run(struct kvm_vcpu *vcpu)
3211 {
3212         int rc, exit_reason;
3213
3214         /*
3215          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3216          * ning the guest), so that memslots (and other stuff) are protected
3217          */
3218         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3219
3220         do {
3221                 rc = vcpu_pre_run(vcpu);
3222                 if (rc)
3223                         break;
3224
3225                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3226                 /*
3227                  * As PF_VCPU will be used in fault handler, between
3228                  * guest_enter and guest_exit should be no uaccess.
3229                  */
3230                 local_irq_disable();
3231                 guest_enter_irqoff();
3232                 __disable_cpu_timer_accounting(vcpu);
3233                 local_irq_enable();
3234                 exit_reason = sie64a(vcpu->arch.sie_block,
3235                                      vcpu->run->s.regs.gprs);
3236                 local_irq_disable();
3237                 __enable_cpu_timer_accounting(vcpu);
3238                 guest_exit_irqoff();
3239                 local_irq_enable();
3240                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3241
3242                 rc = vcpu_post_run(vcpu, exit_reason);
3243         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3244
3245         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3246         return rc;
3247 }
3248
3249 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3250 {
3251         struct runtime_instr_cb *riccb;
3252         struct gs_cb *gscb;
3253
3254         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3255         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3256         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3257         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3258         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3259                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3260         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3261                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3262                 /* some control register changes require a tlb flush */
3263                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3264         }
3265         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3266                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3267                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3268                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3269                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3270                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3271         }
3272         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3273                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3274                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3275                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3276                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3277                         kvm_clear_async_pf_completion_queue(vcpu);
3278         }
3279         /*
3280          * If userspace sets the riccb (e.g. after migration) to a valid state,
3281          * we should enable RI here instead of doing the lazy enablement.
3282          */
3283         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3284             test_kvm_facility(vcpu->kvm, 64) &&
3285             riccb->v &&
3286             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3287                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3288                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3289         }
3290         /*
3291          * If userspace sets the gscb (e.g. after migration) to non-zero,
3292          * we should enable GS here instead of doing the lazy enablement.
3293          */
3294         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3295             test_kvm_facility(vcpu->kvm, 133) &&
3296             gscb->gssm &&
3297             !vcpu->arch.gs_enabled) {
3298                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3299                 vcpu->arch.sie_block->ecb |= ECB_GS;
3300                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3301                 vcpu->arch.gs_enabled = 1;
3302         }
3303         save_access_regs(vcpu->arch.host_acrs);
3304         restore_access_regs(vcpu->run->s.regs.acrs);
3305         /* save host (userspace) fprs/vrs */
3306         save_fpu_regs();
3307         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3308         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3309         if (MACHINE_HAS_VX)
3310                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3311         else
3312                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3313         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3314         if (test_fp_ctl(current->thread.fpu.fpc))
3315                 /* User space provided an invalid FPC, let's clear it */
3316                 current->thread.fpu.fpc = 0;
3317         if (MACHINE_HAS_GS) {
3318                 preempt_disable();
3319                 __ctl_set_bit(2, 4);
3320                 if (current->thread.gs_cb) {
3321                         vcpu->arch.host_gscb = current->thread.gs_cb;
3322                         save_gs_cb(vcpu->arch.host_gscb);
3323                 }
3324                 if (vcpu->arch.gs_enabled) {
3325                         current->thread.gs_cb = (struct gs_cb *)
3326                                                 &vcpu->run->s.regs.gscb;
3327                         restore_gs_cb(current->thread.gs_cb);
3328                 }
3329                 preempt_enable();
3330         }
3331
3332         kvm_run->kvm_dirty_regs = 0;
3333 }
3334
3335 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3336 {
3337         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3338         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3339         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3340         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3341         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3342         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3343         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3344         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3345         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3346         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3347         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3348         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3349         save_access_regs(vcpu->run->s.regs.acrs);
3350         restore_access_regs(vcpu->arch.host_acrs);
3351         /* Save guest register state */
3352         save_fpu_regs();
3353         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3354         /* Restore will be done lazily at return */
3355         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3356         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3357         if (MACHINE_HAS_GS) {
3358                 __ctl_set_bit(2, 4);
3359                 if (vcpu->arch.gs_enabled)
3360                         save_gs_cb(current->thread.gs_cb);
3361                 preempt_disable();
3362                 current->thread.gs_cb = vcpu->arch.host_gscb;
3363                 restore_gs_cb(vcpu->arch.host_gscb);
3364                 preempt_enable();
3365                 if (!vcpu->arch.host_gscb)
3366                         __ctl_clear_bit(2, 4);
3367                 vcpu->arch.host_gscb = NULL;
3368         }
3369
3370 }
3371
3372 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3373 {
3374         int rc;
3375         sigset_t sigsaved;
3376
3377         if (kvm_run->immediate_exit)
3378                 return -EINTR;
3379
3380         if (guestdbg_exit_pending(vcpu)) {
3381                 kvm_s390_prepare_debug_exit(vcpu);
3382                 return 0;
3383         }
3384
3385         if (vcpu->sigset_active)
3386                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3387
3388         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3389                 kvm_s390_vcpu_start(vcpu);
3390         } else if (is_vcpu_stopped(vcpu)) {
3391                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3392                                    vcpu->vcpu_id);
3393                 return -EINVAL;
3394         }
3395
3396         sync_regs(vcpu, kvm_run);
3397         enable_cpu_timer_accounting(vcpu);
3398
3399         might_fault();
3400         rc = __vcpu_run(vcpu);
3401
3402         if (signal_pending(current) && !rc) {
3403                 kvm_run->exit_reason = KVM_EXIT_INTR;
3404                 rc = -EINTR;
3405         }
3406
3407         if (guestdbg_exit_pending(vcpu) && !rc)  {
3408                 kvm_s390_prepare_debug_exit(vcpu);
3409                 rc = 0;
3410         }
3411
3412         if (rc == -EREMOTE) {
3413                 /* userspace support is needed, kvm_run has been prepared */
3414                 rc = 0;
3415         }
3416
3417         disable_cpu_timer_accounting(vcpu);
3418         store_regs(vcpu, kvm_run);
3419
3420         if (vcpu->sigset_active)
3421                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3422
3423         vcpu->stat.exit_userspace++;
3424         return rc;
3425 }
3426
3427 /*
3428  * store status at address
3429  * we use have two special cases:
3430  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3431  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3432  */
3433 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3434 {
3435         unsigned char archmode = 1;
3436         freg_t fprs[NUM_FPRS];
3437         unsigned int px;
3438         u64 clkcomp, cputm;
3439         int rc;
3440
3441         px = kvm_s390_get_prefix(vcpu);
3442         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3443                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3444                         return -EFAULT;
3445                 gpa = 0;
3446         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3447                 if (write_guest_real(vcpu, 163, &archmode, 1))
3448                         return -EFAULT;
3449                 gpa = px;
3450         } else
3451                 gpa -= __LC_FPREGS_SAVE_AREA;
3452
3453         /* manually convert vector registers if necessary */
3454         if (MACHINE_HAS_VX) {
3455                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3456                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3457                                      fprs, 128);
3458         } else {
3459                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3460                                      vcpu->run->s.regs.fprs, 128);
3461         }
3462         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3463                               vcpu->run->s.regs.gprs, 128);
3464         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3465                               &vcpu->arch.sie_block->gpsw, 16);
3466         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3467                               &px, 4);
3468         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3469                               &vcpu->run->s.regs.fpc, 4);
3470         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3471                               &vcpu->arch.sie_block->todpr, 4);
3472         cputm = kvm_s390_get_cpu_timer(vcpu);
3473         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3474                               &cputm, 8);
3475         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3476         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3477                               &clkcomp, 8);
3478         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3479                               &vcpu->run->s.regs.acrs, 64);
3480         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3481                               &vcpu->arch.sie_block->gcr, 128);
3482         return rc ? -EFAULT : 0;
3483 }
3484
3485 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3486 {
3487         /*
3488          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3489          * switch in the run ioctl. Let's update our copies before we save
3490          * it into the save area
3491          */
3492         save_fpu_regs();
3493         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3494         save_access_regs(vcpu->run->s.regs.acrs);
3495
3496         return kvm_s390_store_status_unloaded(vcpu, addr);
3497 }
3498
3499 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3500 {
3501         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3502         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3503 }
3504
3505 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3506 {
3507         unsigned int i;
3508         struct kvm_vcpu *vcpu;
3509
3510         kvm_for_each_vcpu(i, vcpu, kvm) {
3511                 __disable_ibs_on_vcpu(vcpu);
3512         }
3513 }
3514
3515 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3516 {
3517         if (!sclp.has_ibs)
3518                 return;
3519         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3520         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3521 }
3522
3523 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3524 {
3525         int i, online_vcpus, started_vcpus = 0;
3526
3527         if (!is_vcpu_stopped(vcpu))
3528                 return;
3529
3530         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3531         /* Only one cpu at a time may enter/leave the STOPPED state. */
3532         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3533         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3534
3535         for (i = 0; i < online_vcpus; i++) {
3536                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3537                         started_vcpus++;
3538         }
3539
3540         if (started_vcpus == 0) {
3541                 /* we're the only active VCPU -> speed it up */
3542                 __enable_ibs_on_vcpu(vcpu);
3543         } else if (started_vcpus == 1) {
3544                 /*
3545                  * As we are starting a second VCPU, we have to disable
3546                  * the IBS facility on all VCPUs to remove potentially
3547                  * oustanding ENABLE requests.
3548                  */
3549                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3550         }
3551
3552         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3553         /*
3554          * Another VCPU might have used IBS while we were offline.
3555          * Let's play safe and flush the VCPU at startup.
3556          */
3557         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3558         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3559         return;
3560 }
3561
3562 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3563 {
3564         int i, online_vcpus, started_vcpus = 0;
3565         struct kvm_vcpu *started_vcpu = NULL;
3566
3567         if (is_vcpu_stopped(vcpu))
3568                 return;
3569
3570         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3571         /* Only one cpu at a time may enter/leave the STOPPED state. */
3572         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3573         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3574
3575         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3576         kvm_s390_clear_stop_irq(vcpu);
3577
3578         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3579         __disable_ibs_on_vcpu(vcpu);
3580
3581         for (i = 0; i < online_vcpus; i++) {
3582                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3583                         started_vcpus++;
3584                         started_vcpu = vcpu->kvm->vcpus[i];
3585                 }
3586         }
3587
3588         if (started_vcpus == 1) {
3589                 /*
3590                  * As we only have one VCPU left, we want to enable the
3591                  * IBS facility for that VCPU to speed it up.
3592                  */
3593                 __enable_ibs_on_vcpu(started_vcpu);
3594         }
3595
3596         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3597         return;
3598 }
3599
3600 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3601                                      struct kvm_enable_cap *cap)
3602 {
3603         int r;
3604
3605         if (cap->flags)
3606                 return -EINVAL;
3607
3608         switch (cap->cap) {
3609         case KVM_CAP_S390_CSS_SUPPORT:
3610                 if (!vcpu->kvm->arch.css_support) {
3611                         vcpu->kvm->arch.css_support = 1;
3612                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3613                         trace_kvm_s390_enable_css(vcpu->kvm);
3614                 }
3615                 r = 0;
3616                 break;
3617         default:
3618                 r = -EINVAL;
3619                 break;
3620         }
3621         return r;
3622 }
3623
3624 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3625                                   struct kvm_s390_mem_op *mop)
3626 {
3627         void __user *uaddr = (void __user *)mop->buf;
3628         void *tmpbuf = NULL;
3629         int r, srcu_idx;
3630         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3631                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3632
3633         if (mop->flags & ~supported_flags)
3634                 return -EINVAL;
3635
3636         if (mop->size > MEM_OP_MAX_SIZE)
3637                 return -E2BIG;
3638
3639         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3640                 tmpbuf = vmalloc(mop->size);
3641                 if (!tmpbuf)
3642                         return -ENOMEM;
3643         }
3644
3645         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3646
3647         switch (mop->op) {
3648         case KVM_S390_MEMOP_LOGICAL_READ:
3649                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3650                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3651                                             mop->size, GACC_FETCH);
3652                         break;
3653                 }
3654                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3655                 if (r == 0) {
3656                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3657                                 r = -EFAULT;
3658                 }
3659                 break;
3660         case KVM_S390_MEMOP_LOGICAL_WRITE:
3661                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3662                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3663                                             mop->size, GACC_STORE);
3664                         break;
3665                 }
3666                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3667                         r = -EFAULT;
3668                         break;
3669                 }
3670                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3671                 break;
3672         default:
3673                 r = -EINVAL;
3674         }
3675
3676         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3677
3678         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3679                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3680
3681         vfree(tmpbuf);
3682         return r;
3683 }
3684
3685 long kvm_arch_vcpu_ioctl(struct file *filp,
3686                          unsigned int ioctl, unsigned long arg)
3687 {
3688         struct kvm_vcpu *vcpu = filp->private_data;
3689         void __user *argp = (void __user *)arg;
3690         int idx;
3691         long r;
3692
3693         switch (ioctl) {
3694         case KVM_S390_IRQ: {
3695                 struct kvm_s390_irq s390irq;
3696
3697                 r = -EFAULT;
3698                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3699                         break;
3700                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3701                 break;
3702         }
3703         case KVM_S390_INTERRUPT: {
3704                 struct kvm_s390_interrupt s390int;
3705                 struct kvm_s390_irq s390irq;
3706
3707                 r = -EFAULT;
3708                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3709                         break;
3710                 if (s390int_to_s390irq(&s390int, &s390irq))
3711                         return -EINVAL;
3712                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3713                 break;
3714         }
3715         case KVM_S390_STORE_STATUS:
3716                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3717                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3718                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3719                 break;
3720         case KVM_S390_SET_INITIAL_PSW: {
3721                 psw_t psw;
3722
3723                 r = -EFAULT;
3724                 if (copy_from_user(&psw, argp, sizeof(psw)))
3725                         break;
3726                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3727                 break;
3728         }
3729         case KVM_S390_INITIAL_RESET:
3730                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3731                 break;
3732         case KVM_SET_ONE_REG:
3733         case KVM_GET_ONE_REG: {
3734                 struct kvm_one_reg reg;
3735                 r = -EFAULT;
3736                 if (copy_from_user(&reg, argp, sizeof(reg)))
3737                         break;
3738                 if (ioctl == KVM_SET_ONE_REG)
3739                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3740                 else
3741                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3742                 break;
3743         }
3744 #ifdef CONFIG_KVM_S390_UCONTROL
3745         case KVM_S390_UCAS_MAP: {
3746                 struct kvm_s390_ucas_mapping ucasmap;
3747
3748                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3749                         r = -EFAULT;
3750                         break;
3751                 }
3752
3753                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3754                         r = -EINVAL;
3755                         break;
3756                 }
3757
3758                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3759                                      ucasmap.vcpu_addr, ucasmap.length);
3760                 break;
3761         }
3762         case KVM_S390_UCAS_UNMAP: {
3763                 struct kvm_s390_ucas_mapping ucasmap;
3764
3765                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3766                         r = -EFAULT;
3767                         break;
3768                 }
3769
3770                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3771                         r = -EINVAL;
3772                         break;
3773                 }
3774
3775                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3776                         ucasmap.length);
3777                 break;
3778         }
3779 #endif
3780         case KVM_S390_VCPU_FAULT: {
3781                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3782                 break;
3783         }
3784         case KVM_ENABLE_CAP:
3785         {
3786                 struct kvm_enable_cap cap;
3787                 r = -EFAULT;
3788                 if (copy_from_user(&cap, argp, sizeof(cap)))
3789                         break;
3790                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3791                 break;
3792         }
3793         case KVM_S390_MEM_OP: {
3794                 struct kvm_s390_mem_op mem_op;
3795
3796                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3797                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3798                 else
3799                         r = -EFAULT;
3800                 break;
3801         }
3802         case KVM_S390_SET_IRQ_STATE: {
3803                 struct kvm_s390_irq_state irq_state;
3804
3805                 r = -EFAULT;
3806                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3807                         break;
3808                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3809                     irq_state.len == 0 ||
3810                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3811                         r = -EINVAL;
3812                         break;
3813                 }
3814                 r = kvm_s390_set_irq_state(vcpu,
3815                                            (void __user *) irq_state.buf,
3816                                            irq_state.len);
3817                 break;
3818         }
3819         case KVM_S390_GET_IRQ_STATE: {
3820                 struct kvm_s390_irq_state irq_state;
3821
3822                 r = -EFAULT;
3823                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3824                         break;
3825                 if (irq_state.len == 0) {
3826                         r = -EINVAL;
3827                         break;
3828                 }
3829                 r = kvm_s390_get_irq_state(vcpu,
3830                                            (__u8 __user *)  irq_state.buf,
3831                                            irq_state.len);
3832                 break;
3833         }
3834         default:
3835                 r = -ENOTTY;
3836         }
3837         return r;
3838 }
3839
3840 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3841 {
3842 #ifdef CONFIG_KVM_S390_UCONTROL
3843         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3844                  && (kvm_is_ucontrol(vcpu->kvm))) {
3845                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3846                 get_page(vmf->page);
3847                 return 0;
3848         }
3849 #endif
3850         return VM_FAULT_SIGBUS;
3851 }
3852
3853 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3854                             unsigned long npages)
3855 {
3856         return 0;
3857 }
3858
3859 /* Section: memory related */
3860 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3861                                    struct kvm_memory_slot *memslot,
3862                                    const struct kvm_userspace_memory_region *mem,
3863                                    enum kvm_mr_change change)
3864 {
3865         /* A few sanity checks. We can have memory slots which have to be
3866            located/ended at a segment boundary (1MB). The memory in userland is
3867            ok to be fragmented into various different vmas. It is okay to mmap()
3868            and munmap() stuff in this slot after doing this call at any time */
3869
3870         if (mem->userspace_addr & 0xffffful)
3871                 return -EINVAL;
3872
3873         if (mem->memory_size & 0xffffful)
3874                 return -EINVAL;
3875
3876         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3877                 return -EINVAL;
3878
3879         return 0;
3880 }
3881
3882 void kvm_arch_commit_memory_region(struct kvm *kvm,
3883                                 const struct kvm_userspace_memory_region *mem,
3884                                 const struct kvm_memory_slot *old,
3885                                 const struct kvm_memory_slot *new,
3886                                 enum kvm_mr_change change)
3887 {
3888         int rc;
3889
3890         /* If the basics of the memslot do not change, we do not want
3891          * to update the gmap. Every update causes several unnecessary
3892          * segment translation exceptions. This is usually handled just
3893          * fine by the normal fault handler + gmap, but it will also
3894          * cause faults on the prefix page of running guest CPUs.
3895          */
3896         if (old->userspace_addr == mem->userspace_addr &&
3897             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3898             old->npages * PAGE_SIZE == mem->memory_size)
3899                 return;
3900
3901         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3902                 mem->guest_phys_addr, mem->memory_size);
3903         if (rc)
3904                 pr_warn("failed to commit memory region\n");
3905         return;
3906 }
3907
3908 static inline unsigned long nonhyp_mask(int i)
3909 {
3910         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3911
3912         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3913 }
3914
3915 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3916 {
3917         vcpu->valid_wakeup = false;
3918 }
3919
3920 static int __init kvm_s390_init(void)
3921 {
3922         int i;
3923
3924         if (!sclp.has_sief2) {
3925                 pr_info("SIE not available\n");
3926                 return -ENODEV;
3927         }
3928
3929         for (i = 0; i < 16; i++)
3930                 kvm_s390_fac_list_mask[i] |=
3931                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3932
3933         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3934 }
3935
3936 static void __exit kvm_s390_exit(void)
3937 {
3938         kvm_exit();
3939 }
3940
3941 module_init(kvm_s390_init);
3942 module_exit(kvm_s390_exit);
3943
3944 /*
3945  * Enable autoloading of the kvm module.
3946  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3947  * since x86 takes a different approach.
3948  */
3949 #include <linux/miscdevice.h>
3950 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3951 MODULE_ALIAS("devname:kvm");