Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec
[sfrench/cifs-2.6.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60                            (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65         { "userspace_handled", VCPU_STAT(exit_userspace) },
66         { "exit_null", VCPU_STAT(exit_null) },
67         { "exit_validity", VCPU_STAT(exit_validity) },
68         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69         { "exit_external_request", VCPU_STAT(exit_external_request) },
70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71         { "exit_instruction", VCPU_STAT(exit_instruction) },
72         { "exit_pei", VCPU_STAT(exit_pei) },
73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95         { "instruction_spx", VCPU_STAT(instruction_spx) },
96         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97         { "instruction_stap", VCPU_STAT(instruction_stap) },
98         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102         { "instruction_essa", VCPU_STAT(instruction_essa) },
103         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107         { "instruction_sie", VCPU_STAT(instruction_sie) },
108         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124         { "diagnose_10", VCPU_STAT(diagnose_10) },
125         { "diagnose_44", VCPU_STAT(diagnose_44) },
126         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127         { "diagnose_258", VCPU_STAT(diagnose_258) },
128         { "diagnose_308", VCPU_STAT(diagnose_308) },
129         { "diagnose_500", VCPU_STAT(diagnose_500) },
130         { NULL }
131 };
132
133 struct kvm_s390_tod_clock_ext {
134         __u8 epoch_idx;
135         __u64 tod;
136         __u8 reserved[7];
137 } __packed;
138
139 /* allow nested virtualization in KVM (if enabled by user space) */
140 static int nested;
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
143
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
146
147 unsigned long kvm_s390_fac_list_mask_size(void)
148 {
149         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150         return ARRAY_SIZE(kvm_s390_fac_list_mask);
151 }
152
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
157
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
161
162 /* Section: not file related */
163 int kvm_arch_hardware_enable(void)
164 {
165         /* every s390 is virtualization enabled ;-) */
166         return 0;
167 }
168
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
170                               unsigned long end);
171
172 /*
173  * This callback is executed during stop_machine(). All CPUs are therefore
174  * temporarily stopped. In order not to change guest behavior, we have to
175  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
176  * so a CPU won't be stopped while calculating with the epoch.
177  */
178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
179                           void *v)
180 {
181         struct kvm *kvm;
182         struct kvm_vcpu *vcpu;
183         int i;
184         unsigned long long *delta = v;
185
186         list_for_each_entry(kvm, &vm_list, vm_list) {
187                 kvm->arch.epoch -= *delta;
188                 kvm_for_each_vcpu(i, vcpu, kvm) {
189                         vcpu->arch.sie_block->epoch -= *delta;
190                         if (vcpu->arch.cputm_enabled)
191                                 vcpu->arch.cputm_start += *delta;
192                         if (vcpu->arch.vsie_block)
193                                 vcpu->arch.vsie_block->epoch -= *delta;
194                 }
195         }
196         return NOTIFY_OK;
197 }
198
199 static struct notifier_block kvm_clock_notifier = {
200         .notifier_call = kvm_clock_sync,
201 };
202
203 int kvm_arch_hardware_setup(void)
204 {
205         gmap_notifier.notifier_call = kvm_gmap_notifier;
206         gmap_register_pte_notifier(&gmap_notifier);
207         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
208         gmap_register_pte_notifier(&vsie_gmap_notifier);
209         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
210                                        &kvm_clock_notifier);
211         return 0;
212 }
213
214 void kvm_arch_hardware_unsetup(void)
215 {
216         gmap_unregister_pte_notifier(&gmap_notifier);
217         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
218         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
219                                          &kvm_clock_notifier);
220 }
221
222 static void allow_cpu_feat(unsigned long nr)
223 {
224         set_bit_inv(nr, kvm_s390_available_cpu_feat);
225 }
226
227 static inline int plo_test_bit(unsigned char nr)
228 {
229         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
230         int cc;
231
232         asm volatile(
233                 /* Parameter registers are ignored for "test bit" */
234                 "       plo     0,0,0,0(0)\n"
235                 "       ipm     %0\n"
236                 "       srl     %0,28\n"
237                 : "=d" (cc)
238                 : "d" (r0)
239                 : "cc");
240         return cc == 0;
241 }
242
243 static void kvm_s390_cpu_feat_init(void)
244 {
245         int i;
246
247         for (i = 0; i < 256; ++i) {
248                 if (plo_test_bit(i))
249                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
250         }
251
252         if (test_facility(28)) /* TOD-clock steering */
253                 ptff(kvm_s390_available_subfunc.ptff,
254                      sizeof(kvm_s390_available_subfunc.ptff),
255                      PTFF_QAF);
256
257         if (test_facility(17)) { /* MSA */
258                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
259                               kvm_s390_available_subfunc.kmac);
260                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
261                               kvm_s390_available_subfunc.kmc);
262                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
263                               kvm_s390_available_subfunc.km);
264                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
265                               kvm_s390_available_subfunc.kimd);
266                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
267                               kvm_s390_available_subfunc.klmd);
268         }
269         if (test_facility(76)) /* MSA3 */
270                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
271                               kvm_s390_available_subfunc.pckmo);
272         if (test_facility(77)) { /* MSA4 */
273                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
274                               kvm_s390_available_subfunc.kmctr);
275                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
276                               kvm_s390_available_subfunc.kmf);
277                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
278                               kvm_s390_available_subfunc.kmo);
279                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
280                               kvm_s390_available_subfunc.pcc);
281         }
282         if (test_facility(57)) /* MSA5 */
283                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
284                               kvm_s390_available_subfunc.ppno);
285
286         if (test_facility(146)) /* MSA8 */
287                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
288                               kvm_s390_available_subfunc.kma);
289
290         if (MACHINE_HAS_ESOP)
291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
292         /*
293          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
294          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
295          */
296         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
297             !test_facility(3) || !nested)
298                 return;
299         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
300         if (sclp.has_64bscao)
301                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
302         if (sclp.has_siif)
303                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
304         if (sclp.has_gpere)
305                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
306         if (sclp.has_gsls)
307                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
308         if (sclp.has_ib)
309                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
310         if (sclp.has_cei)
311                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
312         if (sclp.has_ibs)
313                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
314         if (sclp.has_kss)
315                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
316         /*
317          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
318          * all skey handling functions read/set the skey from the PGSTE
319          * instead of the real storage key.
320          *
321          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
322          * pages being detected as preserved although they are resident.
323          *
324          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
325          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
326          *
327          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
328          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
329          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
330          *
331          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
332          * cannot easily shadow the SCA because of the ipte lock.
333          */
334 }
335
336 int kvm_arch_init(void *opaque)
337 {
338         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
339         if (!kvm_s390_dbf)
340                 return -ENOMEM;
341
342         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
343                 debug_unregister(kvm_s390_dbf);
344                 return -ENOMEM;
345         }
346
347         kvm_s390_cpu_feat_init();
348
349         /* Register floating interrupt controller interface. */
350         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
351 }
352
353 void kvm_arch_exit(void)
354 {
355         debug_unregister(kvm_s390_dbf);
356 }
357
358 /* Section: device related */
359 long kvm_arch_dev_ioctl(struct file *filp,
360                         unsigned int ioctl, unsigned long arg)
361 {
362         if (ioctl == KVM_S390_ENABLE_SIE)
363                 return s390_enable_sie();
364         return -EINVAL;
365 }
366
367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
368 {
369         int r;
370
371         switch (ext) {
372         case KVM_CAP_S390_PSW:
373         case KVM_CAP_S390_GMAP:
374         case KVM_CAP_SYNC_MMU:
375 #ifdef CONFIG_KVM_S390_UCONTROL
376         case KVM_CAP_S390_UCONTROL:
377 #endif
378         case KVM_CAP_ASYNC_PF:
379         case KVM_CAP_SYNC_REGS:
380         case KVM_CAP_ONE_REG:
381         case KVM_CAP_ENABLE_CAP:
382         case KVM_CAP_S390_CSS_SUPPORT:
383         case KVM_CAP_IOEVENTFD:
384         case KVM_CAP_DEVICE_CTRL:
385         case KVM_CAP_ENABLE_CAP_VM:
386         case KVM_CAP_S390_IRQCHIP:
387         case KVM_CAP_VM_ATTRIBUTES:
388         case KVM_CAP_MP_STATE:
389         case KVM_CAP_IMMEDIATE_EXIT:
390         case KVM_CAP_S390_INJECT_IRQ:
391         case KVM_CAP_S390_USER_SIGP:
392         case KVM_CAP_S390_USER_STSI:
393         case KVM_CAP_S390_SKEYS:
394         case KVM_CAP_S390_IRQ_STATE:
395         case KVM_CAP_S390_USER_INSTR0:
396         case KVM_CAP_S390_CMMA_MIGRATION:
397         case KVM_CAP_S390_AIS:
398                 r = 1;
399                 break;
400         case KVM_CAP_S390_MEM_OP:
401                 r = MEM_OP_MAX_SIZE;
402                 break;
403         case KVM_CAP_NR_VCPUS:
404         case KVM_CAP_MAX_VCPUS:
405                 r = KVM_S390_BSCA_CPU_SLOTS;
406                 if (!kvm_s390_use_sca_entries())
407                         r = KVM_MAX_VCPUS;
408                 else if (sclp.has_esca && sclp.has_64bscao)
409                         r = KVM_S390_ESCA_CPU_SLOTS;
410                 break;
411         case KVM_CAP_NR_MEMSLOTS:
412                 r = KVM_USER_MEM_SLOTS;
413                 break;
414         case KVM_CAP_S390_COW:
415                 r = MACHINE_HAS_ESOP;
416                 break;
417         case KVM_CAP_S390_VECTOR_REGISTERS:
418                 r = MACHINE_HAS_VX;
419                 break;
420         case KVM_CAP_S390_RI:
421                 r = test_facility(64);
422                 break;
423         case KVM_CAP_S390_GS:
424                 r = test_facility(133);
425                 break;
426         default:
427                 r = 0;
428         }
429         return r;
430 }
431
432 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
433                                         struct kvm_memory_slot *memslot)
434 {
435         gfn_t cur_gfn, last_gfn;
436         unsigned long address;
437         struct gmap *gmap = kvm->arch.gmap;
438
439         /* Loop over all guest pages */
440         last_gfn = memslot->base_gfn + memslot->npages;
441         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
442                 address = gfn_to_hva_memslot(memslot, cur_gfn);
443
444                 if (test_and_clear_guest_dirty(gmap->mm, address))
445                         mark_page_dirty(kvm, cur_gfn);
446                 if (fatal_signal_pending(current))
447                         return;
448                 cond_resched();
449         }
450 }
451
452 /* Section: vm related */
453 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
454
455 /*
456  * Get (and clear) the dirty memory log for a memory slot.
457  */
458 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
459                                struct kvm_dirty_log *log)
460 {
461         int r;
462         unsigned long n;
463         struct kvm_memslots *slots;
464         struct kvm_memory_slot *memslot;
465         int is_dirty = 0;
466
467         if (kvm_is_ucontrol(kvm))
468                 return -EINVAL;
469
470         mutex_lock(&kvm->slots_lock);
471
472         r = -EINVAL;
473         if (log->slot >= KVM_USER_MEM_SLOTS)
474                 goto out;
475
476         slots = kvm_memslots(kvm);
477         memslot = id_to_memslot(slots, log->slot);
478         r = -ENOENT;
479         if (!memslot->dirty_bitmap)
480                 goto out;
481
482         kvm_s390_sync_dirty_log(kvm, memslot);
483         r = kvm_get_dirty_log(kvm, log, &is_dirty);
484         if (r)
485                 goto out;
486
487         /* Clear the dirty log */
488         if (is_dirty) {
489                 n = kvm_dirty_bitmap_bytes(memslot);
490                 memset(memslot->dirty_bitmap, 0, n);
491         }
492         r = 0;
493 out:
494         mutex_unlock(&kvm->slots_lock);
495         return r;
496 }
497
498 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
499 {
500         unsigned int i;
501         struct kvm_vcpu *vcpu;
502
503         kvm_for_each_vcpu(i, vcpu, kvm) {
504                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
505         }
506 }
507
508 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
509 {
510         int r;
511
512         if (cap->flags)
513                 return -EINVAL;
514
515         switch (cap->cap) {
516         case KVM_CAP_S390_IRQCHIP:
517                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
518                 kvm->arch.use_irqchip = 1;
519                 r = 0;
520                 break;
521         case KVM_CAP_S390_USER_SIGP:
522                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
523                 kvm->arch.user_sigp = 1;
524                 r = 0;
525                 break;
526         case KVM_CAP_S390_VECTOR_REGISTERS:
527                 mutex_lock(&kvm->lock);
528                 if (kvm->created_vcpus) {
529                         r = -EBUSY;
530                 } else if (MACHINE_HAS_VX) {
531                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
532                         set_kvm_facility(kvm->arch.model.fac_list, 129);
533                         if (test_facility(134)) {
534                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
535                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
536                         }
537                         if (test_facility(135)) {
538                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
539                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
540                         }
541                         r = 0;
542                 } else
543                         r = -EINVAL;
544                 mutex_unlock(&kvm->lock);
545                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
546                          r ? "(not available)" : "(success)");
547                 break;
548         case KVM_CAP_S390_RI:
549                 r = -EINVAL;
550                 mutex_lock(&kvm->lock);
551                 if (kvm->created_vcpus) {
552                         r = -EBUSY;
553                 } else if (test_facility(64)) {
554                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
555                         set_kvm_facility(kvm->arch.model.fac_list, 64);
556                         r = 0;
557                 }
558                 mutex_unlock(&kvm->lock);
559                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
560                          r ? "(not available)" : "(success)");
561                 break;
562         case KVM_CAP_S390_AIS:
563                 mutex_lock(&kvm->lock);
564                 if (kvm->created_vcpus) {
565                         r = -EBUSY;
566                 } else {
567                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
568                         set_kvm_facility(kvm->arch.model.fac_list, 72);
569                         r = 0;
570                 }
571                 mutex_unlock(&kvm->lock);
572                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
573                          r ? "(not available)" : "(success)");
574                 break;
575         case KVM_CAP_S390_GS:
576                 r = -EINVAL;
577                 mutex_lock(&kvm->lock);
578                 if (atomic_read(&kvm->online_vcpus)) {
579                         r = -EBUSY;
580                 } else if (test_facility(133)) {
581                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
582                         set_kvm_facility(kvm->arch.model.fac_list, 133);
583                         r = 0;
584                 }
585                 mutex_unlock(&kvm->lock);
586                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
587                          r ? "(not available)" : "(success)");
588                 break;
589         case KVM_CAP_S390_USER_STSI:
590                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
591                 kvm->arch.user_stsi = 1;
592                 r = 0;
593                 break;
594         case KVM_CAP_S390_USER_INSTR0:
595                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
596                 kvm->arch.user_instr0 = 1;
597                 icpt_operexc_on_all_vcpus(kvm);
598                 r = 0;
599                 break;
600         default:
601                 r = -EINVAL;
602                 break;
603         }
604         return r;
605 }
606
607 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
608 {
609         int ret;
610
611         switch (attr->attr) {
612         case KVM_S390_VM_MEM_LIMIT_SIZE:
613                 ret = 0;
614                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
615                          kvm->arch.mem_limit);
616                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
617                         ret = -EFAULT;
618                 break;
619         default:
620                 ret = -ENXIO;
621                 break;
622         }
623         return ret;
624 }
625
626 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
627 {
628         int ret;
629         unsigned int idx;
630         switch (attr->attr) {
631         case KVM_S390_VM_MEM_ENABLE_CMMA:
632                 ret = -ENXIO;
633                 if (!sclp.has_cmma)
634                         break;
635
636                 ret = -EBUSY;
637                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
638                 mutex_lock(&kvm->lock);
639                 if (!kvm->created_vcpus) {
640                         kvm->arch.use_cmma = 1;
641                         ret = 0;
642                 }
643                 mutex_unlock(&kvm->lock);
644                 break;
645         case KVM_S390_VM_MEM_CLR_CMMA:
646                 ret = -ENXIO;
647                 if (!sclp.has_cmma)
648                         break;
649                 ret = -EINVAL;
650                 if (!kvm->arch.use_cmma)
651                         break;
652
653                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
654                 mutex_lock(&kvm->lock);
655                 idx = srcu_read_lock(&kvm->srcu);
656                 s390_reset_cmma(kvm->arch.gmap->mm);
657                 srcu_read_unlock(&kvm->srcu, idx);
658                 mutex_unlock(&kvm->lock);
659                 ret = 0;
660                 break;
661         case KVM_S390_VM_MEM_LIMIT_SIZE: {
662                 unsigned long new_limit;
663
664                 if (kvm_is_ucontrol(kvm))
665                         return -EINVAL;
666
667                 if (get_user(new_limit, (u64 __user *)attr->addr))
668                         return -EFAULT;
669
670                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
671                     new_limit > kvm->arch.mem_limit)
672                         return -E2BIG;
673
674                 if (!new_limit)
675                         return -EINVAL;
676
677                 /* gmap_create takes last usable address */
678                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
679                         new_limit -= 1;
680
681                 ret = -EBUSY;
682                 mutex_lock(&kvm->lock);
683                 if (!kvm->created_vcpus) {
684                         /* gmap_create will round the limit up */
685                         struct gmap *new = gmap_create(current->mm, new_limit);
686
687                         if (!new) {
688                                 ret = -ENOMEM;
689                         } else {
690                                 gmap_remove(kvm->arch.gmap);
691                                 new->private = kvm;
692                                 kvm->arch.gmap = new;
693                                 ret = 0;
694                         }
695                 }
696                 mutex_unlock(&kvm->lock);
697                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
698                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
699                          (void *) kvm->arch.gmap->asce);
700                 break;
701         }
702         default:
703                 ret = -ENXIO;
704                 break;
705         }
706         return ret;
707 }
708
709 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
710
711 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
712 {
713         struct kvm_vcpu *vcpu;
714         int i;
715
716         if (!test_kvm_facility(kvm, 76))
717                 return -EINVAL;
718
719         mutex_lock(&kvm->lock);
720         switch (attr->attr) {
721         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
722                 get_random_bytes(
723                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
724                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
725                 kvm->arch.crypto.aes_kw = 1;
726                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
727                 break;
728         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
729                 get_random_bytes(
730                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
731                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
732                 kvm->arch.crypto.dea_kw = 1;
733                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
734                 break;
735         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
736                 kvm->arch.crypto.aes_kw = 0;
737                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
738                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
739                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
740                 break;
741         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
742                 kvm->arch.crypto.dea_kw = 0;
743                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
744                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
745                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
746                 break;
747         default:
748                 mutex_unlock(&kvm->lock);
749                 return -ENXIO;
750         }
751
752         kvm_for_each_vcpu(i, vcpu, kvm) {
753                 kvm_s390_vcpu_crypto_setup(vcpu);
754                 exit_sie(vcpu);
755         }
756         mutex_unlock(&kvm->lock);
757         return 0;
758 }
759
760 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
761 {
762         int cx;
763         struct kvm_vcpu *vcpu;
764
765         kvm_for_each_vcpu(cx, vcpu, kvm)
766                 kvm_s390_sync_request(req, vcpu);
767 }
768
769 /*
770  * Must be called with kvm->srcu held to avoid races on memslots, and with
771  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
772  */
773 static int kvm_s390_vm_start_migration(struct kvm *kvm)
774 {
775         struct kvm_s390_migration_state *mgs;
776         struct kvm_memory_slot *ms;
777         /* should be the only one */
778         struct kvm_memslots *slots;
779         unsigned long ram_pages;
780         int slotnr;
781
782         /* migration mode already enabled */
783         if (kvm->arch.migration_state)
784                 return 0;
785
786         slots = kvm_memslots(kvm);
787         if (!slots || !slots->used_slots)
788                 return -EINVAL;
789
790         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
791         if (!mgs)
792                 return -ENOMEM;
793         kvm->arch.migration_state = mgs;
794
795         if (kvm->arch.use_cmma) {
796                 /*
797                  * Get the last slot. They should be sorted by base_gfn, so the
798                  * last slot is also the one at the end of the address space.
799                  * We have verified above that at least one slot is present.
800                  */
801                 ms = slots->memslots + slots->used_slots - 1;
802                 /* round up so we only use full longs */
803                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
804                 /* allocate enough bytes to store all the bits */
805                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
806                 if (!mgs->pgste_bitmap) {
807                         kfree(mgs);
808                         kvm->arch.migration_state = NULL;
809                         return -ENOMEM;
810                 }
811
812                 mgs->bitmap_size = ram_pages;
813                 atomic64_set(&mgs->dirty_pages, ram_pages);
814                 /* mark all the pages in active slots as dirty */
815                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
816                         ms = slots->memslots + slotnr;
817                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
818                 }
819
820                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
821         }
822         return 0;
823 }
824
825 /*
826  * Must be called with kvm->lock to avoid races with ourselves and
827  * kvm_s390_vm_start_migration.
828  */
829 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
830 {
831         struct kvm_s390_migration_state *mgs;
832
833         /* migration mode already disabled */
834         if (!kvm->arch.migration_state)
835                 return 0;
836         mgs = kvm->arch.migration_state;
837         kvm->arch.migration_state = NULL;
838
839         if (kvm->arch.use_cmma) {
840                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
841                 vfree(mgs->pgste_bitmap);
842         }
843         kfree(mgs);
844         return 0;
845 }
846
847 static int kvm_s390_vm_set_migration(struct kvm *kvm,
848                                      struct kvm_device_attr *attr)
849 {
850         int idx, res = -ENXIO;
851
852         mutex_lock(&kvm->lock);
853         switch (attr->attr) {
854         case KVM_S390_VM_MIGRATION_START:
855                 idx = srcu_read_lock(&kvm->srcu);
856                 res = kvm_s390_vm_start_migration(kvm);
857                 srcu_read_unlock(&kvm->srcu, idx);
858                 break;
859         case KVM_S390_VM_MIGRATION_STOP:
860                 res = kvm_s390_vm_stop_migration(kvm);
861                 break;
862         default:
863                 break;
864         }
865         mutex_unlock(&kvm->lock);
866
867         return res;
868 }
869
870 static int kvm_s390_vm_get_migration(struct kvm *kvm,
871                                      struct kvm_device_attr *attr)
872 {
873         u64 mig = (kvm->arch.migration_state != NULL);
874
875         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
876                 return -ENXIO;
877
878         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
879                 return -EFAULT;
880         return 0;
881 }
882
883 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
884 {
885         struct kvm_s390_vm_tod_clock gtod;
886
887         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
888                 return -EFAULT;
889
890         if (test_kvm_facility(kvm, 139))
891                 kvm_s390_set_tod_clock_ext(kvm, &gtod);
892         else if (gtod.epoch_idx == 0)
893                 kvm_s390_set_tod_clock(kvm, gtod.tod);
894         else
895                 return -EINVAL;
896
897         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
898                 gtod.epoch_idx, gtod.tod);
899
900         return 0;
901 }
902
903 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
904 {
905         u8 gtod_high;
906
907         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
908                                            sizeof(gtod_high)))
909                 return -EFAULT;
910
911         if (gtod_high != 0)
912                 return -EINVAL;
913         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
914
915         return 0;
916 }
917
918 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
919 {
920         u64 gtod;
921
922         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
923                 return -EFAULT;
924
925         kvm_s390_set_tod_clock(kvm, gtod);
926         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
927         return 0;
928 }
929
930 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
931 {
932         int ret;
933
934         if (attr->flags)
935                 return -EINVAL;
936
937         switch (attr->attr) {
938         case KVM_S390_VM_TOD_EXT:
939                 ret = kvm_s390_set_tod_ext(kvm, attr);
940                 break;
941         case KVM_S390_VM_TOD_HIGH:
942                 ret = kvm_s390_set_tod_high(kvm, attr);
943                 break;
944         case KVM_S390_VM_TOD_LOW:
945                 ret = kvm_s390_set_tod_low(kvm, attr);
946                 break;
947         default:
948                 ret = -ENXIO;
949                 break;
950         }
951         return ret;
952 }
953
954 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
955                                         struct kvm_s390_vm_tod_clock *gtod)
956 {
957         struct kvm_s390_tod_clock_ext htod;
958
959         preempt_disable();
960
961         get_tod_clock_ext((char *)&htod);
962
963         gtod->tod = htod.tod + kvm->arch.epoch;
964         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
965
966         if (gtod->tod < htod.tod)
967                 gtod->epoch_idx += 1;
968
969         preempt_enable();
970 }
971
972 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
973 {
974         struct kvm_s390_vm_tod_clock gtod;
975
976         memset(&gtod, 0, sizeof(gtod));
977
978         if (test_kvm_facility(kvm, 139))
979                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
980         else
981                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
982
983         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
984                 return -EFAULT;
985
986         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
987                 gtod.epoch_idx, gtod.tod);
988         return 0;
989 }
990
991 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
992 {
993         u8 gtod_high = 0;
994
995         if (copy_to_user((void __user *)attr->addr, &gtod_high,
996                                          sizeof(gtod_high)))
997                 return -EFAULT;
998         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
999
1000         return 0;
1001 }
1002
1003 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1004 {
1005         u64 gtod;
1006
1007         gtod = kvm_s390_get_tod_clock_fast(kvm);
1008         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1009                 return -EFAULT;
1010         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1011
1012         return 0;
1013 }
1014
1015 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1016 {
1017         int ret;
1018
1019         if (attr->flags)
1020                 return -EINVAL;
1021
1022         switch (attr->attr) {
1023         case KVM_S390_VM_TOD_EXT:
1024                 ret = kvm_s390_get_tod_ext(kvm, attr);
1025                 break;
1026         case KVM_S390_VM_TOD_HIGH:
1027                 ret = kvm_s390_get_tod_high(kvm, attr);
1028                 break;
1029         case KVM_S390_VM_TOD_LOW:
1030                 ret = kvm_s390_get_tod_low(kvm, attr);
1031                 break;
1032         default:
1033                 ret = -ENXIO;
1034                 break;
1035         }
1036         return ret;
1037 }
1038
1039 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1040 {
1041         struct kvm_s390_vm_cpu_processor *proc;
1042         u16 lowest_ibc, unblocked_ibc;
1043         int ret = 0;
1044
1045         mutex_lock(&kvm->lock);
1046         if (kvm->created_vcpus) {
1047                 ret = -EBUSY;
1048                 goto out;
1049         }
1050         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1051         if (!proc) {
1052                 ret = -ENOMEM;
1053                 goto out;
1054         }
1055         if (!copy_from_user(proc, (void __user *)attr->addr,
1056                             sizeof(*proc))) {
1057                 kvm->arch.model.cpuid = proc->cpuid;
1058                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1059                 unblocked_ibc = sclp.ibc & 0xfff;
1060                 if (lowest_ibc && proc->ibc) {
1061                         if (proc->ibc > unblocked_ibc)
1062                                 kvm->arch.model.ibc = unblocked_ibc;
1063                         else if (proc->ibc < lowest_ibc)
1064                                 kvm->arch.model.ibc = lowest_ibc;
1065                         else
1066                                 kvm->arch.model.ibc = proc->ibc;
1067                 }
1068                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1069                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1070                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1071                          kvm->arch.model.ibc,
1072                          kvm->arch.model.cpuid);
1073                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1074                          kvm->arch.model.fac_list[0],
1075                          kvm->arch.model.fac_list[1],
1076                          kvm->arch.model.fac_list[2]);
1077         } else
1078                 ret = -EFAULT;
1079         kfree(proc);
1080 out:
1081         mutex_unlock(&kvm->lock);
1082         return ret;
1083 }
1084
1085 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1086                                        struct kvm_device_attr *attr)
1087 {
1088         struct kvm_s390_vm_cpu_feat data;
1089         int ret = -EBUSY;
1090
1091         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1092                 return -EFAULT;
1093         if (!bitmap_subset((unsigned long *) data.feat,
1094                            kvm_s390_available_cpu_feat,
1095                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1096                 return -EINVAL;
1097
1098         mutex_lock(&kvm->lock);
1099         if (!atomic_read(&kvm->online_vcpus)) {
1100                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1101                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1102                 ret = 0;
1103         }
1104         mutex_unlock(&kvm->lock);
1105         return ret;
1106 }
1107
1108 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1109                                           struct kvm_device_attr *attr)
1110 {
1111         /*
1112          * Once supported by kernel + hw, we have to store the subfunctions
1113          * in kvm->arch and remember that user space configured them.
1114          */
1115         return -ENXIO;
1116 }
1117
1118 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1119 {
1120         int ret = -ENXIO;
1121
1122         switch (attr->attr) {
1123         case KVM_S390_VM_CPU_PROCESSOR:
1124                 ret = kvm_s390_set_processor(kvm, attr);
1125                 break;
1126         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1127                 ret = kvm_s390_set_processor_feat(kvm, attr);
1128                 break;
1129         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1130                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1131                 break;
1132         }
1133         return ret;
1134 }
1135
1136 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1137 {
1138         struct kvm_s390_vm_cpu_processor *proc;
1139         int ret = 0;
1140
1141         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1142         if (!proc) {
1143                 ret = -ENOMEM;
1144                 goto out;
1145         }
1146         proc->cpuid = kvm->arch.model.cpuid;
1147         proc->ibc = kvm->arch.model.ibc;
1148         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1149                S390_ARCH_FAC_LIST_SIZE_BYTE);
1150         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1151                  kvm->arch.model.ibc,
1152                  kvm->arch.model.cpuid);
1153         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1154                  kvm->arch.model.fac_list[0],
1155                  kvm->arch.model.fac_list[1],
1156                  kvm->arch.model.fac_list[2]);
1157         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1158                 ret = -EFAULT;
1159         kfree(proc);
1160 out:
1161         return ret;
1162 }
1163
1164 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1165 {
1166         struct kvm_s390_vm_cpu_machine *mach;
1167         int ret = 0;
1168
1169         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1170         if (!mach) {
1171                 ret = -ENOMEM;
1172                 goto out;
1173         }
1174         get_cpu_id((struct cpuid *) &mach->cpuid);
1175         mach->ibc = sclp.ibc;
1176         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1177                S390_ARCH_FAC_LIST_SIZE_BYTE);
1178         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1179                sizeof(S390_lowcore.stfle_fac_list));
1180         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1181                  kvm->arch.model.ibc,
1182                  kvm->arch.model.cpuid);
1183         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1184                  mach->fac_mask[0],
1185                  mach->fac_mask[1],
1186                  mach->fac_mask[2]);
1187         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1188                  mach->fac_list[0],
1189                  mach->fac_list[1],
1190                  mach->fac_list[2]);
1191         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1192                 ret = -EFAULT;
1193         kfree(mach);
1194 out:
1195         return ret;
1196 }
1197
1198 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1199                                        struct kvm_device_attr *attr)
1200 {
1201         struct kvm_s390_vm_cpu_feat data;
1202
1203         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1204                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1205         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1206                 return -EFAULT;
1207         return 0;
1208 }
1209
1210 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1211                                      struct kvm_device_attr *attr)
1212 {
1213         struct kvm_s390_vm_cpu_feat data;
1214
1215         bitmap_copy((unsigned long *) data.feat,
1216                     kvm_s390_available_cpu_feat,
1217                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1218         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1219                 return -EFAULT;
1220         return 0;
1221 }
1222
1223 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1224                                           struct kvm_device_attr *attr)
1225 {
1226         /*
1227          * Once we can actually configure subfunctions (kernel + hw support),
1228          * we have to check if they were already set by user space, if so copy
1229          * them from kvm->arch.
1230          */
1231         return -ENXIO;
1232 }
1233
1234 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1235                                         struct kvm_device_attr *attr)
1236 {
1237         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1238             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1239                 return -EFAULT;
1240         return 0;
1241 }
1242 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1243 {
1244         int ret = -ENXIO;
1245
1246         switch (attr->attr) {
1247         case KVM_S390_VM_CPU_PROCESSOR:
1248                 ret = kvm_s390_get_processor(kvm, attr);
1249                 break;
1250         case KVM_S390_VM_CPU_MACHINE:
1251                 ret = kvm_s390_get_machine(kvm, attr);
1252                 break;
1253         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1254                 ret = kvm_s390_get_processor_feat(kvm, attr);
1255                 break;
1256         case KVM_S390_VM_CPU_MACHINE_FEAT:
1257                 ret = kvm_s390_get_machine_feat(kvm, attr);
1258                 break;
1259         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1260                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1261                 break;
1262         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1263                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1264                 break;
1265         }
1266         return ret;
1267 }
1268
1269 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1270 {
1271         int ret;
1272
1273         switch (attr->group) {
1274         case KVM_S390_VM_MEM_CTRL:
1275                 ret = kvm_s390_set_mem_control(kvm, attr);
1276                 break;
1277         case KVM_S390_VM_TOD:
1278                 ret = kvm_s390_set_tod(kvm, attr);
1279                 break;
1280         case KVM_S390_VM_CPU_MODEL:
1281                 ret = kvm_s390_set_cpu_model(kvm, attr);
1282                 break;
1283         case KVM_S390_VM_CRYPTO:
1284                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1285                 break;
1286         case KVM_S390_VM_MIGRATION:
1287                 ret = kvm_s390_vm_set_migration(kvm, attr);
1288                 break;
1289         default:
1290                 ret = -ENXIO;
1291                 break;
1292         }
1293
1294         return ret;
1295 }
1296
1297 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1298 {
1299         int ret;
1300
1301         switch (attr->group) {
1302         case KVM_S390_VM_MEM_CTRL:
1303                 ret = kvm_s390_get_mem_control(kvm, attr);
1304                 break;
1305         case KVM_S390_VM_TOD:
1306                 ret = kvm_s390_get_tod(kvm, attr);
1307                 break;
1308         case KVM_S390_VM_CPU_MODEL:
1309                 ret = kvm_s390_get_cpu_model(kvm, attr);
1310                 break;
1311         case KVM_S390_VM_MIGRATION:
1312                 ret = kvm_s390_vm_get_migration(kvm, attr);
1313                 break;
1314         default:
1315                 ret = -ENXIO;
1316                 break;
1317         }
1318
1319         return ret;
1320 }
1321
1322 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1323 {
1324         int ret;
1325
1326         switch (attr->group) {
1327         case KVM_S390_VM_MEM_CTRL:
1328                 switch (attr->attr) {
1329                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1330                 case KVM_S390_VM_MEM_CLR_CMMA:
1331                         ret = sclp.has_cmma ? 0 : -ENXIO;
1332                         break;
1333                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1334                         ret = 0;
1335                         break;
1336                 default:
1337                         ret = -ENXIO;
1338                         break;
1339                 }
1340                 break;
1341         case KVM_S390_VM_TOD:
1342                 switch (attr->attr) {
1343                 case KVM_S390_VM_TOD_LOW:
1344                 case KVM_S390_VM_TOD_HIGH:
1345                         ret = 0;
1346                         break;
1347                 default:
1348                         ret = -ENXIO;
1349                         break;
1350                 }
1351                 break;
1352         case KVM_S390_VM_CPU_MODEL:
1353                 switch (attr->attr) {
1354                 case KVM_S390_VM_CPU_PROCESSOR:
1355                 case KVM_S390_VM_CPU_MACHINE:
1356                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1357                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1358                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1359                         ret = 0;
1360                         break;
1361                 /* configuring subfunctions is not supported yet */
1362                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1363                 default:
1364                         ret = -ENXIO;
1365                         break;
1366                 }
1367                 break;
1368         case KVM_S390_VM_CRYPTO:
1369                 switch (attr->attr) {
1370                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1371                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1372                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1373                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1374                         ret = 0;
1375                         break;
1376                 default:
1377                         ret = -ENXIO;
1378                         break;
1379                 }
1380                 break;
1381         case KVM_S390_VM_MIGRATION:
1382                 ret = 0;
1383                 break;
1384         default:
1385                 ret = -ENXIO;
1386                 break;
1387         }
1388
1389         return ret;
1390 }
1391
1392 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1393 {
1394         uint8_t *keys;
1395         uint64_t hva;
1396         int srcu_idx, i, r = 0;
1397
1398         if (args->flags != 0)
1399                 return -EINVAL;
1400
1401         /* Is this guest using storage keys? */
1402         if (!mm_use_skey(current->mm))
1403                 return KVM_S390_GET_SKEYS_NONE;
1404
1405         /* Enforce sane limit on memory allocation */
1406         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1407                 return -EINVAL;
1408
1409         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1410         if (!keys)
1411                 return -ENOMEM;
1412
1413         down_read(&current->mm->mmap_sem);
1414         srcu_idx = srcu_read_lock(&kvm->srcu);
1415         for (i = 0; i < args->count; i++) {
1416                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1417                 if (kvm_is_error_hva(hva)) {
1418                         r = -EFAULT;
1419                         break;
1420                 }
1421
1422                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1423                 if (r)
1424                         break;
1425         }
1426         srcu_read_unlock(&kvm->srcu, srcu_idx);
1427         up_read(&current->mm->mmap_sem);
1428
1429         if (!r) {
1430                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1431                                  sizeof(uint8_t) * args->count);
1432                 if (r)
1433                         r = -EFAULT;
1434         }
1435
1436         kvfree(keys);
1437         return r;
1438 }
1439
1440 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1441 {
1442         uint8_t *keys;
1443         uint64_t hva;
1444         int srcu_idx, i, r = 0;
1445
1446         if (args->flags != 0)
1447                 return -EINVAL;
1448
1449         /* Enforce sane limit on memory allocation */
1450         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1451                 return -EINVAL;
1452
1453         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1454         if (!keys)
1455                 return -ENOMEM;
1456
1457         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1458                            sizeof(uint8_t) * args->count);
1459         if (r) {
1460                 r = -EFAULT;
1461                 goto out;
1462         }
1463
1464         /* Enable storage key handling for the guest */
1465         r = s390_enable_skey();
1466         if (r)
1467                 goto out;
1468
1469         down_read(&current->mm->mmap_sem);
1470         srcu_idx = srcu_read_lock(&kvm->srcu);
1471         for (i = 0; i < args->count; i++) {
1472                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1473                 if (kvm_is_error_hva(hva)) {
1474                         r = -EFAULT;
1475                         break;
1476                 }
1477
1478                 /* Lowest order bit is reserved */
1479                 if (keys[i] & 0x01) {
1480                         r = -EINVAL;
1481                         break;
1482                 }
1483
1484                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1485                 if (r)
1486                         break;
1487         }
1488         srcu_read_unlock(&kvm->srcu, srcu_idx);
1489         up_read(&current->mm->mmap_sem);
1490 out:
1491         kvfree(keys);
1492         return r;
1493 }
1494
1495 /*
1496  * Base address and length must be sent at the start of each block, therefore
1497  * it's cheaper to send some clean data, as long as it's less than the size of
1498  * two longs.
1499  */
1500 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1501 /* for consistency */
1502 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1503
1504 /*
1505  * This function searches for the next page with dirty CMMA attributes, and
1506  * saves the attributes in the buffer up to either the end of the buffer or
1507  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1508  * no trailing clean bytes are saved.
1509  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1510  * output buffer will indicate 0 as length.
1511  */
1512 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1513                                   struct kvm_s390_cmma_log *args)
1514 {
1515         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1516         unsigned long bufsize, hva, pgstev, i, next, cur;
1517         int srcu_idx, peek, r = 0, rr;
1518         u8 *res;
1519
1520         cur = args->start_gfn;
1521         i = next = pgstev = 0;
1522
1523         if (unlikely(!kvm->arch.use_cmma))
1524                 return -ENXIO;
1525         /* Invalid/unsupported flags were specified */
1526         if (args->flags & ~KVM_S390_CMMA_PEEK)
1527                 return -EINVAL;
1528         /* Migration mode query, and we are not doing a migration */
1529         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1530         if (!peek && !s)
1531                 return -EINVAL;
1532         /* CMMA is disabled or was not used, or the buffer has length zero */
1533         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1534         if (!bufsize || !kvm->mm->context.use_cmma) {
1535                 memset(args, 0, sizeof(*args));
1536                 return 0;
1537         }
1538
1539         if (!peek) {
1540                 /* We are not peeking, and there are no dirty pages */
1541                 if (!atomic64_read(&s->dirty_pages)) {
1542                         memset(args, 0, sizeof(*args));
1543                         return 0;
1544                 }
1545                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1546                                     args->start_gfn);
1547                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1548                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1549                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1550                         memset(args, 0, sizeof(*args));
1551                         return 0;
1552                 }
1553                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1554         }
1555
1556         res = vmalloc(bufsize);
1557         if (!res)
1558                 return -ENOMEM;
1559
1560         args->start_gfn = cur;
1561
1562         down_read(&kvm->mm->mmap_sem);
1563         srcu_idx = srcu_read_lock(&kvm->srcu);
1564         while (i < bufsize) {
1565                 hva = gfn_to_hva(kvm, cur);
1566                 if (kvm_is_error_hva(hva)) {
1567                         r = -EFAULT;
1568                         break;
1569                 }
1570                 /* decrement only if we actually flipped the bit to 0 */
1571                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1572                         atomic64_dec(&s->dirty_pages);
1573                 r = get_pgste(kvm->mm, hva, &pgstev);
1574                 if (r < 0)
1575                         pgstev = 0;
1576                 /* save the value */
1577                 res[i++] = (pgstev >> 24) & 0x43;
1578                 /*
1579                  * if the next bit is too far away, stop.
1580                  * if we reached the previous "next", find the next one
1581                  */
1582                 if (!peek) {
1583                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1584                                 break;
1585                         if (cur == next)
1586                                 next = find_next_bit(s->pgste_bitmap,
1587                                                      s->bitmap_size, cur + 1);
1588                 /* reached the end of the bitmap or of the buffer, stop */
1589                         if ((next >= s->bitmap_size) ||
1590                             (next >= args->start_gfn + bufsize))
1591                                 break;
1592                 }
1593                 cur++;
1594         }
1595         srcu_read_unlock(&kvm->srcu, srcu_idx);
1596         up_read(&kvm->mm->mmap_sem);
1597         args->count = i;
1598         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1599
1600         rr = copy_to_user((void __user *)args->values, res, args->count);
1601         if (rr)
1602                 r = -EFAULT;
1603
1604         vfree(res);
1605         return r;
1606 }
1607
1608 /*
1609  * This function sets the CMMA attributes for the given pages. If the input
1610  * buffer has zero length, no action is taken, otherwise the attributes are
1611  * set and the mm->context.use_cmma flag is set.
1612  */
1613 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1614                                   const struct kvm_s390_cmma_log *args)
1615 {
1616         unsigned long hva, mask, pgstev, i;
1617         uint8_t *bits;
1618         int srcu_idx, r = 0;
1619
1620         mask = args->mask;
1621
1622         if (!kvm->arch.use_cmma)
1623                 return -ENXIO;
1624         /* invalid/unsupported flags */
1625         if (args->flags != 0)
1626                 return -EINVAL;
1627         /* Enforce sane limit on memory allocation */
1628         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1629                 return -EINVAL;
1630         /* Nothing to do */
1631         if (args->count == 0)
1632                 return 0;
1633
1634         bits = vmalloc(sizeof(*bits) * args->count);
1635         if (!bits)
1636                 return -ENOMEM;
1637
1638         r = copy_from_user(bits, (void __user *)args->values, args->count);
1639         if (r) {
1640                 r = -EFAULT;
1641                 goto out;
1642         }
1643
1644         down_read(&kvm->mm->mmap_sem);
1645         srcu_idx = srcu_read_lock(&kvm->srcu);
1646         for (i = 0; i < args->count; i++) {
1647                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1648                 if (kvm_is_error_hva(hva)) {
1649                         r = -EFAULT;
1650                         break;
1651                 }
1652
1653                 pgstev = bits[i];
1654                 pgstev = pgstev << 24;
1655                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1656                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1657         }
1658         srcu_read_unlock(&kvm->srcu, srcu_idx);
1659         up_read(&kvm->mm->mmap_sem);
1660
1661         if (!kvm->mm->context.use_cmma) {
1662                 down_write(&kvm->mm->mmap_sem);
1663                 kvm->mm->context.use_cmma = 1;
1664                 up_write(&kvm->mm->mmap_sem);
1665         }
1666 out:
1667         vfree(bits);
1668         return r;
1669 }
1670
1671 long kvm_arch_vm_ioctl(struct file *filp,
1672                        unsigned int ioctl, unsigned long arg)
1673 {
1674         struct kvm *kvm = filp->private_data;
1675         void __user *argp = (void __user *)arg;
1676         struct kvm_device_attr attr;
1677         int r;
1678
1679         switch (ioctl) {
1680         case KVM_S390_INTERRUPT: {
1681                 struct kvm_s390_interrupt s390int;
1682
1683                 r = -EFAULT;
1684                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1685                         break;
1686                 r = kvm_s390_inject_vm(kvm, &s390int);
1687                 break;
1688         }
1689         case KVM_ENABLE_CAP: {
1690                 struct kvm_enable_cap cap;
1691                 r = -EFAULT;
1692                 if (copy_from_user(&cap, argp, sizeof(cap)))
1693                         break;
1694                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1695                 break;
1696         }
1697         case KVM_CREATE_IRQCHIP: {
1698                 struct kvm_irq_routing_entry routing;
1699
1700                 r = -EINVAL;
1701                 if (kvm->arch.use_irqchip) {
1702                         /* Set up dummy routing. */
1703                         memset(&routing, 0, sizeof(routing));
1704                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1705                 }
1706                 break;
1707         }
1708         case KVM_SET_DEVICE_ATTR: {
1709                 r = -EFAULT;
1710                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1711                         break;
1712                 r = kvm_s390_vm_set_attr(kvm, &attr);
1713                 break;
1714         }
1715         case KVM_GET_DEVICE_ATTR: {
1716                 r = -EFAULT;
1717                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1718                         break;
1719                 r = kvm_s390_vm_get_attr(kvm, &attr);
1720                 break;
1721         }
1722         case KVM_HAS_DEVICE_ATTR: {
1723                 r = -EFAULT;
1724                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1725                         break;
1726                 r = kvm_s390_vm_has_attr(kvm, &attr);
1727                 break;
1728         }
1729         case KVM_S390_GET_SKEYS: {
1730                 struct kvm_s390_skeys args;
1731
1732                 r = -EFAULT;
1733                 if (copy_from_user(&args, argp,
1734                                    sizeof(struct kvm_s390_skeys)))
1735                         break;
1736                 r = kvm_s390_get_skeys(kvm, &args);
1737                 break;
1738         }
1739         case KVM_S390_SET_SKEYS: {
1740                 struct kvm_s390_skeys args;
1741
1742                 r = -EFAULT;
1743                 if (copy_from_user(&args, argp,
1744                                    sizeof(struct kvm_s390_skeys)))
1745                         break;
1746                 r = kvm_s390_set_skeys(kvm, &args);
1747                 break;
1748         }
1749         case KVM_S390_GET_CMMA_BITS: {
1750                 struct kvm_s390_cmma_log args;
1751
1752                 r = -EFAULT;
1753                 if (copy_from_user(&args, argp, sizeof(args)))
1754                         break;
1755                 r = kvm_s390_get_cmma_bits(kvm, &args);
1756                 if (!r) {
1757                         r = copy_to_user(argp, &args, sizeof(args));
1758                         if (r)
1759                                 r = -EFAULT;
1760                 }
1761                 break;
1762         }
1763         case KVM_S390_SET_CMMA_BITS: {
1764                 struct kvm_s390_cmma_log args;
1765
1766                 r = -EFAULT;
1767                 if (copy_from_user(&args, argp, sizeof(args)))
1768                         break;
1769                 r = kvm_s390_set_cmma_bits(kvm, &args);
1770                 break;
1771         }
1772         default:
1773                 r = -ENOTTY;
1774         }
1775
1776         return r;
1777 }
1778
1779 static int kvm_s390_query_ap_config(u8 *config)
1780 {
1781         u32 fcn_code = 0x04000000UL;
1782         u32 cc = 0;
1783
1784         memset(config, 0, 128);
1785         asm volatile(
1786                 "lgr 0,%1\n"
1787                 "lgr 2,%2\n"
1788                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1789                 "0: ipm %0\n"
1790                 "srl %0,28\n"
1791                 "1:\n"
1792                 EX_TABLE(0b, 1b)
1793                 : "+r" (cc)
1794                 : "r" (fcn_code), "r" (config)
1795                 : "cc", "0", "2", "memory"
1796         );
1797
1798         return cc;
1799 }
1800
1801 static int kvm_s390_apxa_installed(void)
1802 {
1803         u8 config[128];
1804         int cc;
1805
1806         if (test_facility(12)) {
1807                 cc = kvm_s390_query_ap_config(config);
1808
1809                 if (cc)
1810                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1811                 else
1812                         return config[0] & 0x40;
1813         }
1814
1815         return 0;
1816 }
1817
1818 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1819 {
1820         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1821
1822         if (kvm_s390_apxa_installed())
1823                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1824         else
1825                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1826 }
1827
1828 static u64 kvm_s390_get_initial_cpuid(void)
1829 {
1830         struct cpuid cpuid;
1831
1832         get_cpu_id(&cpuid);
1833         cpuid.version = 0xff;
1834         return *((u64 *) &cpuid);
1835 }
1836
1837 static void kvm_s390_crypto_init(struct kvm *kvm)
1838 {
1839         if (!test_kvm_facility(kvm, 76))
1840                 return;
1841
1842         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1843         kvm_s390_set_crycb_format(kvm);
1844
1845         /* Enable AES/DEA protected key functions by default */
1846         kvm->arch.crypto.aes_kw = 1;
1847         kvm->arch.crypto.dea_kw = 1;
1848         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1849                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1850         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1851                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1852 }
1853
1854 static void sca_dispose(struct kvm *kvm)
1855 {
1856         if (kvm->arch.use_esca)
1857                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1858         else
1859                 free_page((unsigned long)(kvm->arch.sca));
1860         kvm->arch.sca = NULL;
1861 }
1862
1863 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1864 {
1865         gfp_t alloc_flags = GFP_KERNEL;
1866         int i, rc;
1867         char debug_name[16];
1868         static unsigned long sca_offset;
1869
1870         rc = -EINVAL;
1871 #ifdef CONFIG_KVM_S390_UCONTROL
1872         if (type & ~KVM_VM_S390_UCONTROL)
1873                 goto out_err;
1874         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1875                 goto out_err;
1876 #else
1877         if (type)
1878                 goto out_err;
1879 #endif
1880
1881         rc = s390_enable_sie();
1882         if (rc)
1883                 goto out_err;
1884
1885         rc = -ENOMEM;
1886
1887         kvm->arch.use_esca = 0; /* start with basic SCA */
1888         if (!sclp.has_64bscao)
1889                 alloc_flags |= GFP_DMA;
1890         rwlock_init(&kvm->arch.sca_lock);
1891         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1892         if (!kvm->arch.sca)
1893                 goto out_err;
1894         spin_lock(&kvm_lock);
1895         sca_offset += 16;
1896         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1897                 sca_offset = 0;
1898         kvm->arch.sca = (struct bsca_block *)
1899                         ((char *) kvm->arch.sca + sca_offset);
1900         spin_unlock(&kvm_lock);
1901
1902         sprintf(debug_name, "kvm-%u", current->pid);
1903
1904         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1905         if (!kvm->arch.dbf)
1906                 goto out_err;
1907
1908         kvm->arch.sie_page2 =
1909              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1910         if (!kvm->arch.sie_page2)
1911                 goto out_err;
1912
1913         /* Populate the facility mask initially. */
1914         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1915                sizeof(S390_lowcore.stfle_fac_list));
1916         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1917                 if (i < kvm_s390_fac_list_mask_size())
1918                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1919                 else
1920                         kvm->arch.model.fac_mask[i] = 0UL;
1921         }
1922
1923         /* Populate the facility list initially. */
1924         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1925         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1926                S390_ARCH_FAC_LIST_SIZE_BYTE);
1927
1928         /* we are always in czam mode - even on pre z14 machines */
1929         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1930         set_kvm_facility(kvm->arch.model.fac_list, 138);
1931         /* we emulate STHYI in kvm */
1932         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1933         set_kvm_facility(kvm->arch.model.fac_list, 74);
1934         if (MACHINE_HAS_TLB_GUEST) {
1935                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1936                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1937         }
1938
1939         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1940         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1941
1942         kvm_s390_crypto_init(kvm);
1943
1944         mutex_init(&kvm->arch.float_int.ais_lock);
1945         kvm->arch.float_int.simm = 0;
1946         kvm->arch.float_int.nimm = 0;
1947         spin_lock_init(&kvm->arch.float_int.lock);
1948         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1949                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1950         init_waitqueue_head(&kvm->arch.ipte_wq);
1951         mutex_init(&kvm->arch.ipte_mutex);
1952
1953         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1954         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1955
1956         if (type & KVM_VM_S390_UCONTROL) {
1957                 kvm->arch.gmap = NULL;
1958                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1959         } else {
1960                 if (sclp.hamax == U64_MAX)
1961                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1962                 else
1963                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1964                                                     sclp.hamax + 1);
1965                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1966                 if (!kvm->arch.gmap)
1967                         goto out_err;
1968                 kvm->arch.gmap->private = kvm;
1969                 kvm->arch.gmap->pfault_enabled = 0;
1970         }
1971
1972         kvm->arch.css_support = 0;
1973         kvm->arch.use_irqchip = 0;
1974         kvm->arch.epoch = 0;
1975
1976         spin_lock_init(&kvm->arch.start_stop_lock);
1977         kvm_s390_vsie_init(kvm);
1978         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1979
1980         return 0;
1981 out_err:
1982         free_page((unsigned long)kvm->arch.sie_page2);
1983         debug_unregister(kvm->arch.dbf);
1984         sca_dispose(kvm);
1985         KVM_EVENT(3, "creation of vm failed: %d", rc);
1986         return rc;
1987 }
1988
1989 bool kvm_arch_has_vcpu_debugfs(void)
1990 {
1991         return false;
1992 }
1993
1994 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1995 {
1996         return 0;
1997 }
1998
1999 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2000 {
2001         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2002         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2003         kvm_s390_clear_local_irqs(vcpu);
2004         kvm_clear_async_pf_completion_queue(vcpu);
2005         if (!kvm_is_ucontrol(vcpu->kvm))
2006                 sca_del_vcpu(vcpu);
2007
2008         if (kvm_is_ucontrol(vcpu->kvm))
2009                 gmap_remove(vcpu->arch.gmap);
2010
2011         if (vcpu->kvm->arch.use_cmma)
2012                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2013         free_page((unsigned long)(vcpu->arch.sie_block));
2014
2015         kvm_vcpu_uninit(vcpu);
2016         kmem_cache_free(kvm_vcpu_cache, vcpu);
2017 }
2018
2019 static void kvm_free_vcpus(struct kvm *kvm)
2020 {
2021         unsigned int i;
2022         struct kvm_vcpu *vcpu;
2023
2024         kvm_for_each_vcpu(i, vcpu, kvm)
2025                 kvm_arch_vcpu_destroy(vcpu);
2026
2027         mutex_lock(&kvm->lock);
2028         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2029                 kvm->vcpus[i] = NULL;
2030
2031         atomic_set(&kvm->online_vcpus, 0);
2032         mutex_unlock(&kvm->lock);
2033 }
2034
2035 void kvm_arch_destroy_vm(struct kvm *kvm)
2036 {
2037         kvm_free_vcpus(kvm);
2038         sca_dispose(kvm);
2039         debug_unregister(kvm->arch.dbf);
2040         free_page((unsigned long)kvm->arch.sie_page2);
2041         if (!kvm_is_ucontrol(kvm))
2042                 gmap_remove(kvm->arch.gmap);
2043         kvm_s390_destroy_adapters(kvm);
2044         kvm_s390_clear_float_irqs(kvm);
2045         kvm_s390_vsie_destroy(kvm);
2046         if (kvm->arch.migration_state) {
2047                 vfree(kvm->arch.migration_state->pgste_bitmap);
2048                 kfree(kvm->arch.migration_state);
2049         }
2050         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2051 }
2052
2053 /* Section: vcpu related */
2054 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2055 {
2056         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2057         if (!vcpu->arch.gmap)
2058                 return -ENOMEM;
2059         vcpu->arch.gmap->private = vcpu->kvm;
2060
2061         return 0;
2062 }
2063
2064 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2065 {
2066         if (!kvm_s390_use_sca_entries())
2067                 return;
2068         read_lock(&vcpu->kvm->arch.sca_lock);
2069         if (vcpu->kvm->arch.use_esca) {
2070                 struct esca_block *sca = vcpu->kvm->arch.sca;
2071
2072                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2073                 sca->cpu[vcpu->vcpu_id].sda = 0;
2074         } else {
2075                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2076
2077                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2078                 sca->cpu[vcpu->vcpu_id].sda = 0;
2079         }
2080         read_unlock(&vcpu->kvm->arch.sca_lock);
2081 }
2082
2083 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2084 {
2085         if (!kvm_s390_use_sca_entries()) {
2086                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2087
2088                 /* we still need the basic sca for the ipte control */
2089                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2090                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2091         }
2092         read_lock(&vcpu->kvm->arch.sca_lock);
2093         if (vcpu->kvm->arch.use_esca) {
2094                 struct esca_block *sca = vcpu->kvm->arch.sca;
2095
2096                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2097                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2098                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2099                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2100                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2101         } else {
2102                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2103
2104                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2105                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2106                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2107                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2108         }
2109         read_unlock(&vcpu->kvm->arch.sca_lock);
2110 }
2111
2112 /* Basic SCA to Extended SCA data copy routines */
2113 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2114 {
2115         d->sda = s->sda;
2116         d->sigp_ctrl.c = s->sigp_ctrl.c;
2117         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2118 }
2119
2120 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2121 {
2122         int i;
2123
2124         d->ipte_control = s->ipte_control;
2125         d->mcn[0] = s->mcn;
2126         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2127                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2128 }
2129
2130 static int sca_switch_to_extended(struct kvm *kvm)
2131 {
2132         struct bsca_block *old_sca = kvm->arch.sca;
2133         struct esca_block *new_sca;
2134         struct kvm_vcpu *vcpu;
2135         unsigned int vcpu_idx;
2136         u32 scaol, scaoh;
2137
2138         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2139         if (!new_sca)
2140                 return -ENOMEM;
2141
2142         scaoh = (u32)((u64)(new_sca) >> 32);
2143         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2144
2145         kvm_s390_vcpu_block_all(kvm);
2146         write_lock(&kvm->arch.sca_lock);
2147
2148         sca_copy_b_to_e(new_sca, old_sca);
2149
2150         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2151                 vcpu->arch.sie_block->scaoh = scaoh;
2152                 vcpu->arch.sie_block->scaol = scaol;
2153                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2154         }
2155         kvm->arch.sca = new_sca;
2156         kvm->arch.use_esca = 1;
2157
2158         write_unlock(&kvm->arch.sca_lock);
2159         kvm_s390_vcpu_unblock_all(kvm);
2160
2161         free_page((unsigned long)old_sca);
2162
2163         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2164                  old_sca, kvm->arch.sca);
2165         return 0;
2166 }
2167
2168 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2169 {
2170         int rc;
2171
2172         if (!kvm_s390_use_sca_entries()) {
2173                 if (id < KVM_MAX_VCPUS)
2174                         return true;
2175                 return false;
2176         }
2177         if (id < KVM_S390_BSCA_CPU_SLOTS)
2178                 return true;
2179         if (!sclp.has_esca || !sclp.has_64bscao)
2180                 return false;
2181
2182         mutex_lock(&kvm->lock);
2183         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2184         mutex_unlock(&kvm->lock);
2185
2186         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2187 }
2188
2189 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2190 {
2191         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2192         kvm_clear_async_pf_completion_queue(vcpu);
2193         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2194                                     KVM_SYNC_GPRS |
2195                                     KVM_SYNC_ACRS |
2196                                     KVM_SYNC_CRS |
2197                                     KVM_SYNC_ARCH0 |
2198                                     KVM_SYNC_PFAULT;
2199         kvm_s390_set_prefix(vcpu, 0);
2200         if (test_kvm_facility(vcpu->kvm, 64))
2201                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2202         if (test_kvm_facility(vcpu->kvm, 133))
2203                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2204         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2205          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2206          */
2207         if (MACHINE_HAS_VX)
2208                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2209         else
2210                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2211
2212         if (kvm_is_ucontrol(vcpu->kvm))
2213                 return __kvm_ucontrol_vcpu_init(vcpu);
2214
2215         return 0;
2216 }
2217
2218 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2219 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2220 {
2221         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2222         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2223         vcpu->arch.cputm_start = get_tod_clock_fast();
2224         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2225 }
2226
2227 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2228 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2229 {
2230         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2231         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2232         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2233         vcpu->arch.cputm_start = 0;
2234         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2235 }
2236
2237 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2238 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2239 {
2240         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2241         vcpu->arch.cputm_enabled = true;
2242         __start_cpu_timer_accounting(vcpu);
2243 }
2244
2245 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2246 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2247 {
2248         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2249         __stop_cpu_timer_accounting(vcpu);
2250         vcpu->arch.cputm_enabled = false;
2251 }
2252
2253 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2254 {
2255         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2256         __enable_cpu_timer_accounting(vcpu);
2257         preempt_enable();
2258 }
2259
2260 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2261 {
2262         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2263         __disable_cpu_timer_accounting(vcpu);
2264         preempt_enable();
2265 }
2266
2267 /* set the cpu timer - may only be called from the VCPU thread itself */
2268 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2269 {
2270         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2271         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2272         if (vcpu->arch.cputm_enabled)
2273                 vcpu->arch.cputm_start = get_tod_clock_fast();
2274         vcpu->arch.sie_block->cputm = cputm;
2275         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2276         preempt_enable();
2277 }
2278
2279 /* update and get the cpu timer - can also be called from other VCPU threads */
2280 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2281 {
2282         unsigned int seq;
2283         __u64 value;
2284
2285         if (unlikely(!vcpu->arch.cputm_enabled))
2286                 return vcpu->arch.sie_block->cputm;
2287
2288         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2289         do {
2290                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2291                 /*
2292                  * If the writer would ever execute a read in the critical
2293                  * section, e.g. in irq context, we have a deadlock.
2294                  */
2295                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2296                 value = vcpu->arch.sie_block->cputm;
2297                 /* if cputm_start is 0, accounting is being started/stopped */
2298                 if (likely(vcpu->arch.cputm_start))
2299                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2300         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2301         preempt_enable();
2302         return value;
2303 }
2304
2305 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2306 {
2307
2308         gmap_enable(vcpu->arch.enabled_gmap);
2309         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2310         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2311                 __start_cpu_timer_accounting(vcpu);
2312         vcpu->cpu = cpu;
2313 }
2314
2315 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2316 {
2317         vcpu->cpu = -1;
2318         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2319                 __stop_cpu_timer_accounting(vcpu);
2320         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2321         vcpu->arch.enabled_gmap = gmap_get_enabled();
2322         gmap_disable(vcpu->arch.enabled_gmap);
2323
2324 }
2325
2326 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2327 {
2328         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2329         vcpu->arch.sie_block->gpsw.mask = 0UL;
2330         vcpu->arch.sie_block->gpsw.addr = 0UL;
2331         kvm_s390_set_prefix(vcpu, 0);
2332         kvm_s390_set_cpu_timer(vcpu, 0);
2333         vcpu->arch.sie_block->ckc       = 0UL;
2334         vcpu->arch.sie_block->todpr     = 0;
2335         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2336         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2337         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2338         /* make sure the new fpc will be lazily loaded */
2339         save_fpu_regs();
2340         current->thread.fpu.fpc = 0;
2341         vcpu->arch.sie_block->gbea = 1;
2342         vcpu->arch.sie_block->pp = 0;
2343         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2344         kvm_clear_async_pf_completion_queue(vcpu);
2345         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2346                 kvm_s390_vcpu_stop(vcpu);
2347         kvm_s390_clear_local_irqs(vcpu);
2348 }
2349
2350 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2351 {
2352         mutex_lock(&vcpu->kvm->lock);
2353         preempt_disable();
2354         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2355         preempt_enable();
2356         mutex_unlock(&vcpu->kvm->lock);
2357         if (!kvm_is_ucontrol(vcpu->kvm)) {
2358                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2359                 sca_add_vcpu(vcpu);
2360         }
2361         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2362                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2363         /* make vcpu_load load the right gmap on the first trigger */
2364         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2365 }
2366
2367 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2368 {
2369         if (!test_kvm_facility(vcpu->kvm, 76))
2370                 return;
2371
2372         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2373
2374         if (vcpu->kvm->arch.crypto.aes_kw)
2375                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2376         if (vcpu->kvm->arch.crypto.dea_kw)
2377                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2378
2379         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2380 }
2381
2382 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2383 {
2384         free_page(vcpu->arch.sie_block->cbrlo);
2385         vcpu->arch.sie_block->cbrlo = 0;
2386 }
2387
2388 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2389 {
2390         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2391         if (!vcpu->arch.sie_block->cbrlo)
2392                 return -ENOMEM;
2393
2394         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2395         return 0;
2396 }
2397
2398 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2399 {
2400         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2401
2402         vcpu->arch.sie_block->ibc = model->ibc;
2403         if (test_kvm_facility(vcpu->kvm, 7))
2404                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2405 }
2406
2407 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2408 {
2409         int rc = 0;
2410
2411         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2412                                                     CPUSTAT_SM |
2413                                                     CPUSTAT_STOPPED);
2414
2415         if (test_kvm_facility(vcpu->kvm, 78))
2416                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2417         else if (test_kvm_facility(vcpu->kvm, 8))
2418                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2419
2420         kvm_s390_vcpu_setup_model(vcpu);
2421
2422         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2423         if (MACHINE_HAS_ESOP)
2424                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2425         if (test_kvm_facility(vcpu->kvm, 9))
2426                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2427         if (test_kvm_facility(vcpu->kvm, 73))
2428                 vcpu->arch.sie_block->ecb |= ECB_TE;
2429
2430         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2431                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2432         if (test_kvm_facility(vcpu->kvm, 130))
2433                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2434         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2435         if (sclp.has_cei)
2436                 vcpu->arch.sie_block->eca |= ECA_CEI;
2437         if (sclp.has_ib)
2438                 vcpu->arch.sie_block->eca |= ECA_IB;
2439         if (sclp.has_siif)
2440                 vcpu->arch.sie_block->eca |= ECA_SII;
2441         if (sclp.has_sigpif)
2442                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2443         if (test_kvm_facility(vcpu->kvm, 129)) {
2444                 vcpu->arch.sie_block->eca |= ECA_VX;
2445                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2446         }
2447         if (test_kvm_facility(vcpu->kvm, 139))
2448                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2449
2450         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2451                                         | SDNXC;
2452         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2453
2454         if (sclp.has_kss)
2455                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2456         else
2457                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2458
2459         if (vcpu->kvm->arch.use_cmma) {
2460                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2461                 if (rc)
2462                         return rc;
2463         }
2464         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2465         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2466
2467         kvm_s390_vcpu_crypto_setup(vcpu);
2468
2469         return rc;
2470 }
2471
2472 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2473                                       unsigned int id)
2474 {
2475         struct kvm_vcpu *vcpu;
2476         struct sie_page *sie_page;
2477         int rc = -EINVAL;
2478
2479         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2480                 goto out;
2481
2482         rc = -ENOMEM;
2483
2484         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2485         if (!vcpu)
2486                 goto out;
2487
2488         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2489         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2490         if (!sie_page)
2491                 goto out_free_cpu;
2492
2493         vcpu->arch.sie_block = &sie_page->sie_block;
2494         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2495
2496         /* the real guest size will always be smaller than msl */
2497         vcpu->arch.sie_block->mso = 0;
2498         vcpu->arch.sie_block->msl = sclp.hamax;
2499
2500         vcpu->arch.sie_block->icpua = id;
2501         spin_lock_init(&vcpu->arch.local_int.lock);
2502         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2503         vcpu->arch.local_int.wq = &vcpu->wq;
2504         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2505         seqcount_init(&vcpu->arch.cputm_seqcount);
2506
2507         rc = kvm_vcpu_init(vcpu, kvm, id);
2508         if (rc)
2509                 goto out_free_sie_block;
2510         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2511                  vcpu->arch.sie_block);
2512         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2513
2514         return vcpu;
2515 out_free_sie_block:
2516         free_page((unsigned long)(vcpu->arch.sie_block));
2517 out_free_cpu:
2518         kmem_cache_free(kvm_vcpu_cache, vcpu);
2519 out:
2520         return ERR_PTR(rc);
2521 }
2522
2523 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2524 {
2525         return kvm_s390_vcpu_has_irq(vcpu, 0);
2526 }
2527
2528 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2529 {
2530         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2531 }
2532
2533 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2534 {
2535         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2536         exit_sie(vcpu);
2537 }
2538
2539 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2540 {
2541         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2542 }
2543
2544 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2545 {
2546         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2547         exit_sie(vcpu);
2548 }
2549
2550 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2551 {
2552         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2553 }
2554
2555 /*
2556  * Kick a guest cpu out of SIE and wait until SIE is not running.
2557  * If the CPU is not running (e.g. waiting as idle) the function will
2558  * return immediately. */
2559 void exit_sie(struct kvm_vcpu *vcpu)
2560 {
2561         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2562         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2563                 cpu_relax();
2564 }
2565
2566 /* Kick a guest cpu out of SIE to process a request synchronously */
2567 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2568 {
2569         kvm_make_request(req, vcpu);
2570         kvm_s390_vcpu_request(vcpu);
2571 }
2572
2573 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2574                               unsigned long end)
2575 {
2576         struct kvm *kvm = gmap->private;
2577         struct kvm_vcpu *vcpu;
2578         unsigned long prefix;
2579         int i;
2580
2581         if (gmap_is_shadow(gmap))
2582                 return;
2583         if (start >= 1UL << 31)
2584                 /* We are only interested in prefix pages */
2585                 return;
2586         kvm_for_each_vcpu(i, vcpu, kvm) {
2587                 /* match against both prefix pages */
2588                 prefix = kvm_s390_get_prefix(vcpu);
2589                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2590                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2591                                    start, end);
2592                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2593                 }
2594         }
2595 }
2596
2597 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2598 {
2599         /* kvm common code refers to this, but never calls it */
2600         BUG();
2601         return 0;
2602 }
2603
2604 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2605                                            struct kvm_one_reg *reg)
2606 {
2607         int r = -EINVAL;
2608
2609         switch (reg->id) {
2610         case KVM_REG_S390_TODPR:
2611                 r = put_user(vcpu->arch.sie_block->todpr,
2612                              (u32 __user *)reg->addr);
2613                 break;
2614         case KVM_REG_S390_EPOCHDIFF:
2615                 r = put_user(vcpu->arch.sie_block->epoch,
2616                              (u64 __user *)reg->addr);
2617                 break;
2618         case KVM_REG_S390_CPU_TIMER:
2619                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2620                              (u64 __user *)reg->addr);
2621                 break;
2622         case KVM_REG_S390_CLOCK_COMP:
2623                 r = put_user(vcpu->arch.sie_block->ckc,
2624                              (u64 __user *)reg->addr);
2625                 break;
2626         case KVM_REG_S390_PFTOKEN:
2627                 r = put_user(vcpu->arch.pfault_token,
2628                              (u64 __user *)reg->addr);
2629                 break;
2630         case KVM_REG_S390_PFCOMPARE:
2631                 r = put_user(vcpu->arch.pfault_compare,
2632                              (u64 __user *)reg->addr);
2633                 break;
2634         case KVM_REG_S390_PFSELECT:
2635                 r = put_user(vcpu->arch.pfault_select,
2636                              (u64 __user *)reg->addr);
2637                 break;
2638         case KVM_REG_S390_PP:
2639                 r = put_user(vcpu->arch.sie_block->pp,
2640                              (u64 __user *)reg->addr);
2641                 break;
2642         case KVM_REG_S390_GBEA:
2643                 r = put_user(vcpu->arch.sie_block->gbea,
2644                              (u64 __user *)reg->addr);
2645                 break;
2646         default:
2647                 break;
2648         }
2649
2650         return r;
2651 }
2652
2653 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2654                                            struct kvm_one_reg *reg)
2655 {
2656         int r = -EINVAL;
2657         __u64 val;
2658
2659         switch (reg->id) {
2660         case KVM_REG_S390_TODPR:
2661                 r = get_user(vcpu->arch.sie_block->todpr,
2662                              (u32 __user *)reg->addr);
2663                 break;
2664         case KVM_REG_S390_EPOCHDIFF:
2665                 r = get_user(vcpu->arch.sie_block->epoch,
2666                              (u64 __user *)reg->addr);
2667                 break;
2668         case KVM_REG_S390_CPU_TIMER:
2669                 r = get_user(val, (u64 __user *)reg->addr);
2670                 if (!r)
2671                         kvm_s390_set_cpu_timer(vcpu, val);
2672                 break;
2673         case KVM_REG_S390_CLOCK_COMP:
2674                 r = get_user(vcpu->arch.sie_block->ckc,
2675                              (u64 __user *)reg->addr);
2676                 break;
2677         case KVM_REG_S390_PFTOKEN:
2678                 r = get_user(vcpu->arch.pfault_token,
2679                              (u64 __user *)reg->addr);
2680                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2681                         kvm_clear_async_pf_completion_queue(vcpu);
2682                 break;
2683         case KVM_REG_S390_PFCOMPARE:
2684                 r = get_user(vcpu->arch.pfault_compare,
2685                              (u64 __user *)reg->addr);
2686                 break;
2687         case KVM_REG_S390_PFSELECT:
2688                 r = get_user(vcpu->arch.pfault_select,
2689                              (u64 __user *)reg->addr);
2690                 break;
2691         case KVM_REG_S390_PP:
2692                 r = get_user(vcpu->arch.sie_block->pp,
2693                              (u64 __user *)reg->addr);
2694                 break;
2695         case KVM_REG_S390_GBEA:
2696                 r = get_user(vcpu->arch.sie_block->gbea,
2697                              (u64 __user *)reg->addr);
2698                 break;
2699         default:
2700                 break;
2701         }
2702
2703         return r;
2704 }
2705
2706 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2707 {
2708         kvm_s390_vcpu_initial_reset(vcpu);
2709         return 0;
2710 }
2711
2712 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2713 {
2714         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2715         return 0;
2716 }
2717
2718 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2719 {
2720         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2721         return 0;
2722 }
2723
2724 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2725                                   struct kvm_sregs *sregs)
2726 {
2727         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2728         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2729         return 0;
2730 }
2731
2732 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2733                                   struct kvm_sregs *sregs)
2734 {
2735         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2736         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2737         return 0;
2738 }
2739
2740 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2741 {
2742         if (test_fp_ctl(fpu->fpc))
2743                 return -EINVAL;
2744         vcpu->run->s.regs.fpc = fpu->fpc;
2745         if (MACHINE_HAS_VX)
2746                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2747                                  (freg_t *) fpu->fprs);
2748         else
2749                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2750         return 0;
2751 }
2752
2753 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2754 {
2755         /* make sure we have the latest values */
2756         save_fpu_regs();
2757         if (MACHINE_HAS_VX)
2758                 convert_vx_to_fp((freg_t *) fpu->fprs,
2759                                  (__vector128 *) vcpu->run->s.regs.vrs);
2760         else
2761                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2762         fpu->fpc = vcpu->run->s.regs.fpc;
2763         return 0;
2764 }
2765
2766 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2767 {
2768         int rc = 0;
2769
2770         if (!is_vcpu_stopped(vcpu))
2771                 rc = -EBUSY;
2772         else {
2773                 vcpu->run->psw_mask = psw.mask;
2774                 vcpu->run->psw_addr = psw.addr;
2775         }
2776         return rc;
2777 }
2778
2779 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2780                                   struct kvm_translation *tr)
2781 {
2782         return -EINVAL; /* not implemented yet */
2783 }
2784
2785 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2786                               KVM_GUESTDBG_USE_HW_BP | \
2787                               KVM_GUESTDBG_ENABLE)
2788
2789 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2790                                         struct kvm_guest_debug *dbg)
2791 {
2792         int rc = 0;
2793
2794         vcpu->guest_debug = 0;
2795         kvm_s390_clear_bp_data(vcpu);
2796
2797         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2798                 return -EINVAL;
2799         if (!sclp.has_gpere)
2800                 return -EINVAL;
2801
2802         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2803                 vcpu->guest_debug = dbg->control;
2804                 /* enforce guest PER */
2805                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2806
2807                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2808                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2809         } else {
2810                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2811                 vcpu->arch.guestdbg.last_bp = 0;
2812         }
2813
2814         if (rc) {
2815                 vcpu->guest_debug = 0;
2816                 kvm_s390_clear_bp_data(vcpu);
2817                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2818         }
2819
2820         return rc;
2821 }
2822
2823 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2824                                     struct kvm_mp_state *mp_state)
2825 {
2826         /* CHECK_STOP and LOAD are not supported yet */
2827         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2828                                        KVM_MP_STATE_OPERATING;
2829 }
2830
2831 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2832                                     struct kvm_mp_state *mp_state)
2833 {
2834         int rc = 0;
2835
2836         /* user space knows about this interface - let it control the state */
2837         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2838
2839         switch (mp_state->mp_state) {
2840         case KVM_MP_STATE_STOPPED:
2841                 kvm_s390_vcpu_stop(vcpu);
2842                 break;
2843         case KVM_MP_STATE_OPERATING:
2844                 kvm_s390_vcpu_start(vcpu);
2845                 break;
2846         case KVM_MP_STATE_LOAD:
2847         case KVM_MP_STATE_CHECK_STOP:
2848                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2849         default:
2850                 rc = -ENXIO;
2851         }
2852
2853         return rc;
2854 }
2855
2856 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2857 {
2858         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2859 }
2860
2861 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2862 {
2863 retry:
2864         kvm_s390_vcpu_request_handled(vcpu);
2865         if (!kvm_request_pending(vcpu))
2866                 return 0;
2867         /*
2868          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2869          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2870          * This ensures that the ipte instruction for this request has
2871          * already finished. We might race against a second unmapper that
2872          * wants to set the blocking bit. Lets just retry the request loop.
2873          */
2874         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2875                 int rc;
2876                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2877                                           kvm_s390_get_prefix(vcpu),
2878                                           PAGE_SIZE * 2, PROT_WRITE);
2879                 if (rc) {
2880                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2881                         return rc;
2882                 }
2883                 goto retry;
2884         }
2885
2886         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2887                 vcpu->arch.sie_block->ihcpu = 0xffff;
2888                 goto retry;
2889         }
2890
2891         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2892                 if (!ibs_enabled(vcpu)) {
2893                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2894                         atomic_or(CPUSTAT_IBS,
2895                                         &vcpu->arch.sie_block->cpuflags);
2896                 }
2897                 goto retry;
2898         }
2899
2900         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2901                 if (ibs_enabled(vcpu)) {
2902                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2903                         atomic_andnot(CPUSTAT_IBS,
2904                                           &vcpu->arch.sie_block->cpuflags);
2905                 }
2906                 goto retry;
2907         }
2908
2909         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2910                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2911                 goto retry;
2912         }
2913
2914         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2915                 /*
2916                  * Disable CMMA virtualization; we will emulate the ESSA
2917                  * instruction manually, in order to provide additional
2918                  * functionalities needed for live migration.
2919                  */
2920                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2921                 goto retry;
2922         }
2923
2924         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2925                 /*
2926                  * Re-enable CMMA virtualization if CMMA is available and
2927                  * was used.
2928                  */
2929                 if ((vcpu->kvm->arch.use_cmma) &&
2930                     (vcpu->kvm->mm->context.use_cmma))
2931                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2932                 goto retry;
2933         }
2934
2935         /* nothing to do, just clear the request */
2936         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2937
2938         return 0;
2939 }
2940
2941 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2942                                  const struct kvm_s390_vm_tod_clock *gtod)
2943 {
2944         struct kvm_vcpu *vcpu;
2945         struct kvm_s390_tod_clock_ext htod;
2946         int i;
2947
2948         mutex_lock(&kvm->lock);
2949         preempt_disable();
2950
2951         get_tod_clock_ext((char *)&htod);
2952
2953         kvm->arch.epoch = gtod->tod - htod.tod;
2954         kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2955
2956         if (kvm->arch.epoch > gtod->tod)
2957                 kvm->arch.epdx -= 1;
2958
2959         kvm_s390_vcpu_block_all(kvm);
2960         kvm_for_each_vcpu(i, vcpu, kvm) {
2961                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2962                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2963         }
2964
2965         kvm_s390_vcpu_unblock_all(kvm);
2966         preempt_enable();
2967         mutex_unlock(&kvm->lock);
2968 }
2969
2970 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2971 {
2972         struct kvm_vcpu *vcpu;
2973         int i;
2974
2975         mutex_lock(&kvm->lock);
2976         preempt_disable();
2977         kvm->arch.epoch = tod - get_tod_clock();
2978         kvm_s390_vcpu_block_all(kvm);
2979         kvm_for_each_vcpu(i, vcpu, kvm)
2980                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2981         kvm_s390_vcpu_unblock_all(kvm);
2982         preempt_enable();
2983         mutex_unlock(&kvm->lock);
2984 }
2985
2986 /**
2987  * kvm_arch_fault_in_page - fault-in guest page if necessary
2988  * @vcpu: The corresponding virtual cpu
2989  * @gpa: Guest physical address
2990  * @writable: Whether the page should be writable or not
2991  *
2992  * Make sure that a guest page has been faulted-in on the host.
2993  *
2994  * Return: Zero on success, negative error code otherwise.
2995  */
2996 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2997 {
2998         return gmap_fault(vcpu->arch.gmap, gpa,
2999                           writable ? FAULT_FLAG_WRITE : 0);
3000 }
3001
3002 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3003                                       unsigned long token)
3004 {
3005         struct kvm_s390_interrupt inti;
3006         struct kvm_s390_irq irq;
3007
3008         if (start_token) {
3009                 irq.u.ext.ext_params2 = token;
3010                 irq.type = KVM_S390_INT_PFAULT_INIT;
3011                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3012         } else {
3013                 inti.type = KVM_S390_INT_PFAULT_DONE;
3014                 inti.parm64 = token;
3015                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3016         }
3017 }
3018
3019 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3020                                      struct kvm_async_pf *work)
3021 {
3022         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3023         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3024 }
3025
3026 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3027                                  struct kvm_async_pf *work)
3028 {
3029         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3030         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3031 }
3032
3033 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3034                                struct kvm_async_pf *work)
3035 {
3036         /* s390 will always inject the page directly */
3037 }
3038
3039 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3040 {
3041         /*
3042          * s390 will always inject the page directly,
3043          * but we still want check_async_completion to cleanup
3044          */
3045         return true;
3046 }
3047
3048 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3049 {
3050         hva_t hva;
3051         struct kvm_arch_async_pf arch;
3052         int rc;
3053
3054         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3055                 return 0;
3056         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3057             vcpu->arch.pfault_compare)
3058                 return 0;
3059         if (psw_extint_disabled(vcpu))
3060                 return 0;
3061         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3062                 return 0;
3063         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3064                 return 0;
3065         if (!vcpu->arch.gmap->pfault_enabled)
3066                 return 0;
3067
3068         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3069         hva += current->thread.gmap_addr & ~PAGE_MASK;
3070         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3071                 return 0;
3072
3073         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3074         return rc;
3075 }
3076
3077 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3078 {
3079         int rc, cpuflags;
3080
3081         /*
3082          * On s390 notifications for arriving pages will be delivered directly
3083          * to the guest but the house keeping for completed pfaults is
3084          * handled outside the worker.
3085          */
3086         kvm_check_async_pf_completion(vcpu);
3087
3088         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3089         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3090
3091         if (need_resched())
3092                 schedule();
3093
3094         if (test_cpu_flag(CIF_MCCK_PENDING))
3095                 s390_handle_mcck();
3096
3097         if (!kvm_is_ucontrol(vcpu->kvm)) {
3098                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3099                 if (rc)
3100                         return rc;
3101         }
3102
3103         rc = kvm_s390_handle_requests(vcpu);
3104         if (rc)
3105                 return rc;
3106
3107         if (guestdbg_enabled(vcpu)) {
3108                 kvm_s390_backup_guest_per_regs(vcpu);
3109                 kvm_s390_patch_guest_per_regs(vcpu);
3110         }
3111
3112         vcpu->arch.sie_block->icptcode = 0;
3113         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3114         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3115         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3116
3117         return 0;
3118 }
3119
3120 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3121 {
3122         struct kvm_s390_pgm_info pgm_info = {
3123                 .code = PGM_ADDRESSING,
3124         };
3125         u8 opcode, ilen;
3126         int rc;
3127
3128         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3129         trace_kvm_s390_sie_fault(vcpu);
3130
3131         /*
3132          * We want to inject an addressing exception, which is defined as a
3133          * suppressing or terminating exception. However, since we came here
3134          * by a DAT access exception, the PSW still points to the faulting
3135          * instruction since DAT exceptions are nullifying. So we've got
3136          * to look up the current opcode to get the length of the instruction
3137          * to be able to forward the PSW.
3138          */
3139         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3140         ilen = insn_length(opcode);
3141         if (rc < 0) {
3142                 return rc;
3143         } else if (rc) {
3144                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3145                  * Forward by arbitrary ilc, injection will take care of
3146                  * nullification if necessary.
3147                  */
3148                 pgm_info = vcpu->arch.pgm;
3149                 ilen = 4;
3150         }
3151         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3152         kvm_s390_forward_psw(vcpu, ilen);
3153         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3154 }
3155
3156 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3157 {
3158         struct mcck_volatile_info *mcck_info;
3159         struct sie_page *sie_page;
3160
3161         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3162                    vcpu->arch.sie_block->icptcode);
3163         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3164
3165         if (guestdbg_enabled(vcpu))
3166                 kvm_s390_restore_guest_per_regs(vcpu);
3167
3168         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3169         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3170
3171         if (exit_reason == -EINTR) {
3172                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3173                 sie_page = container_of(vcpu->arch.sie_block,
3174                                         struct sie_page, sie_block);
3175                 mcck_info = &sie_page->mcck_info;
3176                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3177                 return 0;
3178         }
3179
3180         if (vcpu->arch.sie_block->icptcode > 0) {
3181                 int rc = kvm_handle_sie_intercept(vcpu);
3182
3183                 if (rc != -EOPNOTSUPP)
3184                         return rc;
3185                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3186                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3187                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3188                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3189                 return -EREMOTE;
3190         } else if (exit_reason != -EFAULT) {
3191                 vcpu->stat.exit_null++;
3192                 return 0;
3193         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3194                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3195                 vcpu->run->s390_ucontrol.trans_exc_code =
3196                                                 current->thread.gmap_addr;
3197                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3198                 return -EREMOTE;
3199         } else if (current->thread.gmap_pfault) {
3200                 trace_kvm_s390_major_guest_pfault(vcpu);
3201                 current->thread.gmap_pfault = 0;
3202                 if (kvm_arch_setup_async_pf(vcpu))
3203                         return 0;
3204                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3205         }
3206         return vcpu_post_run_fault_in_sie(vcpu);
3207 }
3208
3209 static int __vcpu_run(struct kvm_vcpu *vcpu)
3210 {
3211         int rc, exit_reason;
3212
3213         /*
3214          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3215          * ning the guest), so that memslots (and other stuff) are protected
3216          */
3217         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3218
3219         do {
3220                 rc = vcpu_pre_run(vcpu);
3221                 if (rc)
3222                         break;
3223
3224                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3225                 /*
3226                  * As PF_VCPU will be used in fault handler, between
3227                  * guest_enter and guest_exit should be no uaccess.
3228                  */
3229                 local_irq_disable();
3230                 guest_enter_irqoff();
3231                 __disable_cpu_timer_accounting(vcpu);
3232                 local_irq_enable();
3233                 exit_reason = sie64a(vcpu->arch.sie_block,
3234                                      vcpu->run->s.regs.gprs);
3235                 local_irq_disable();
3236                 __enable_cpu_timer_accounting(vcpu);
3237                 guest_exit_irqoff();
3238                 local_irq_enable();
3239                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3240
3241                 rc = vcpu_post_run(vcpu, exit_reason);
3242         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3243
3244         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3245         return rc;
3246 }
3247
3248 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3249 {
3250         struct runtime_instr_cb *riccb;
3251         struct gs_cb *gscb;
3252
3253         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3254         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3255         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3256         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3257         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3258                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3259         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3260                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3261                 /* some control register changes require a tlb flush */
3262                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3263         }
3264         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3265                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3266                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3267                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3268                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3269                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3270         }
3271         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3272                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3273                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3274                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3275                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3276                         kvm_clear_async_pf_completion_queue(vcpu);
3277         }
3278         /*
3279          * If userspace sets the riccb (e.g. after migration) to a valid state,
3280          * we should enable RI here instead of doing the lazy enablement.
3281          */
3282         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3283             test_kvm_facility(vcpu->kvm, 64) &&
3284             riccb->v &&
3285             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3286                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3287                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3288         }
3289         /*
3290          * If userspace sets the gscb (e.g. after migration) to non-zero,
3291          * we should enable GS here instead of doing the lazy enablement.
3292          */
3293         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3294             test_kvm_facility(vcpu->kvm, 133) &&
3295             gscb->gssm &&
3296             !vcpu->arch.gs_enabled) {
3297                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3298                 vcpu->arch.sie_block->ecb |= ECB_GS;
3299                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3300                 vcpu->arch.gs_enabled = 1;
3301         }
3302         save_access_regs(vcpu->arch.host_acrs);
3303         restore_access_regs(vcpu->run->s.regs.acrs);
3304         /* save host (userspace) fprs/vrs */
3305         save_fpu_regs();
3306         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3307         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3308         if (MACHINE_HAS_VX)
3309                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3310         else
3311                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3312         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3313         if (test_fp_ctl(current->thread.fpu.fpc))
3314                 /* User space provided an invalid FPC, let's clear it */
3315                 current->thread.fpu.fpc = 0;
3316         if (MACHINE_HAS_GS) {
3317                 preempt_disable();
3318                 __ctl_set_bit(2, 4);
3319                 if (current->thread.gs_cb) {
3320                         vcpu->arch.host_gscb = current->thread.gs_cb;
3321                         save_gs_cb(vcpu->arch.host_gscb);
3322                 }
3323                 if (vcpu->arch.gs_enabled) {
3324                         current->thread.gs_cb = (struct gs_cb *)
3325                                                 &vcpu->run->s.regs.gscb;
3326                         restore_gs_cb(current->thread.gs_cb);
3327                 }
3328                 preempt_enable();
3329         }
3330
3331         kvm_run->kvm_dirty_regs = 0;
3332 }
3333
3334 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3335 {
3336         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3337         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3338         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3339         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3340         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3341         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3342         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3343         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3344         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3345         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3346         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3347         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3348         save_access_regs(vcpu->run->s.regs.acrs);
3349         restore_access_regs(vcpu->arch.host_acrs);
3350         /* Save guest register state */
3351         save_fpu_regs();
3352         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3353         /* Restore will be done lazily at return */
3354         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3355         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3356         if (MACHINE_HAS_GS) {
3357                 __ctl_set_bit(2, 4);
3358                 if (vcpu->arch.gs_enabled)
3359                         save_gs_cb(current->thread.gs_cb);
3360                 preempt_disable();
3361                 current->thread.gs_cb = vcpu->arch.host_gscb;
3362                 restore_gs_cb(vcpu->arch.host_gscb);
3363                 preempt_enable();
3364                 if (!vcpu->arch.host_gscb)
3365                         __ctl_clear_bit(2, 4);
3366                 vcpu->arch.host_gscb = NULL;
3367         }
3368
3369 }
3370
3371 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3372 {
3373         int rc;
3374         sigset_t sigsaved;
3375
3376         if (kvm_run->immediate_exit)
3377                 return -EINTR;
3378
3379         if (guestdbg_exit_pending(vcpu)) {
3380                 kvm_s390_prepare_debug_exit(vcpu);
3381                 return 0;
3382         }
3383
3384         if (vcpu->sigset_active)
3385                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3386
3387         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3388                 kvm_s390_vcpu_start(vcpu);
3389         } else if (is_vcpu_stopped(vcpu)) {
3390                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3391                                    vcpu->vcpu_id);
3392                 return -EINVAL;
3393         }
3394
3395         sync_regs(vcpu, kvm_run);
3396         enable_cpu_timer_accounting(vcpu);
3397
3398         might_fault();
3399         rc = __vcpu_run(vcpu);
3400
3401         if (signal_pending(current) && !rc) {
3402                 kvm_run->exit_reason = KVM_EXIT_INTR;
3403                 rc = -EINTR;
3404         }
3405
3406         if (guestdbg_exit_pending(vcpu) && !rc)  {
3407                 kvm_s390_prepare_debug_exit(vcpu);
3408                 rc = 0;
3409         }
3410
3411         if (rc == -EREMOTE) {
3412                 /* userspace support is needed, kvm_run has been prepared */
3413                 rc = 0;
3414         }
3415
3416         disable_cpu_timer_accounting(vcpu);
3417         store_regs(vcpu, kvm_run);
3418
3419         if (vcpu->sigset_active)
3420                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3421
3422         vcpu->stat.exit_userspace++;
3423         return rc;
3424 }
3425
3426 /*
3427  * store status at address
3428  * we use have two special cases:
3429  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3430  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3431  */
3432 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3433 {
3434         unsigned char archmode = 1;
3435         freg_t fprs[NUM_FPRS];
3436         unsigned int px;
3437         u64 clkcomp, cputm;
3438         int rc;
3439
3440         px = kvm_s390_get_prefix(vcpu);
3441         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3442                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3443                         return -EFAULT;
3444                 gpa = 0;
3445         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3446                 if (write_guest_real(vcpu, 163, &archmode, 1))
3447                         return -EFAULT;
3448                 gpa = px;
3449         } else
3450                 gpa -= __LC_FPREGS_SAVE_AREA;
3451
3452         /* manually convert vector registers if necessary */
3453         if (MACHINE_HAS_VX) {
3454                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3455                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3456                                      fprs, 128);
3457         } else {
3458                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3459                                      vcpu->run->s.regs.fprs, 128);
3460         }
3461         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3462                               vcpu->run->s.regs.gprs, 128);
3463         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3464                               &vcpu->arch.sie_block->gpsw, 16);
3465         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3466                               &px, 4);
3467         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3468                               &vcpu->run->s.regs.fpc, 4);
3469         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3470                               &vcpu->arch.sie_block->todpr, 4);
3471         cputm = kvm_s390_get_cpu_timer(vcpu);
3472         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3473                               &cputm, 8);
3474         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3475         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3476                               &clkcomp, 8);
3477         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3478                               &vcpu->run->s.regs.acrs, 64);
3479         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3480                               &vcpu->arch.sie_block->gcr, 128);
3481         return rc ? -EFAULT : 0;
3482 }
3483
3484 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3485 {
3486         /*
3487          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3488          * switch in the run ioctl. Let's update our copies before we save
3489          * it into the save area
3490          */
3491         save_fpu_regs();
3492         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3493         save_access_regs(vcpu->run->s.regs.acrs);
3494
3495         return kvm_s390_store_status_unloaded(vcpu, addr);
3496 }
3497
3498 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3499 {
3500         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3501         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3502 }
3503
3504 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3505 {
3506         unsigned int i;
3507         struct kvm_vcpu *vcpu;
3508
3509         kvm_for_each_vcpu(i, vcpu, kvm) {
3510                 __disable_ibs_on_vcpu(vcpu);
3511         }
3512 }
3513
3514 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3515 {
3516         if (!sclp.has_ibs)
3517                 return;
3518         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3519         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3520 }
3521
3522 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3523 {
3524         int i, online_vcpus, started_vcpus = 0;
3525
3526         if (!is_vcpu_stopped(vcpu))
3527                 return;
3528
3529         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3530         /* Only one cpu at a time may enter/leave the STOPPED state. */
3531         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3532         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3533
3534         for (i = 0; i < online_vcpus; i++) {
3535                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3536                         started_vcpus++;
3537         }
3538
3539         if (started_vcpus == 0) {
3540                 /* we're the only active VCPU -> speed it up */
3541                 __enable_ibs_on_vcpu(vcpu);
3542         } else if (started_vcpus == 1) {
3543                 /*
3544                  * As we are starting a second VCPU, we have to disable
3545                  * the IBS facility on all VCPUs to remove potentially
3546                  * oustanding ENABLE requests.
3547                  */
3548                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3549         }
3550
3551         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3552         /*
3553          * Another VCPU might have used IBS while we were offline.
3554          * Let's play safe and flush the VCPU at startup.
3555          */
3556         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3557         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3558         return;
3559 }
3560
3561 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3562 {
3563         int i, online_vcpus, started_vcpus = 0;
3564         struct kvm_vcpu *started_vcpu = NULL;
3565
3566         if (is_vcpu_stopped(vcpu))
3567                 return;
3568
3569         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3570         /* Only one cpu at a time may enter/leave the STOPPED state. */
3571         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3572         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3573
3574         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3575         kvm_s390_clear_stop_irq(vcpu);
3576
3577         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3578         __disable_ibs_on_vcpu(vcpu);
3579
3580         for (i = 0; i < online_vcpus; i++) {
3581                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3582                         started_vcpus++;
3583                         started_vcpu = vcpu->kvm->vcpus[i];
3584                 }
3585         }
3586
3587         if (started_vcpus == 1) {
3588                 /*
3589                  * As we only have one VCPU left, we want to enable the
3590                  * IBS facility for that VCPU to speed it up.
3591                  */
3592                 __enable_ibs_on_vcpu(started_vcpu);
3593         }
3594
3595         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3596         return;
3597 }
3598
3599 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3600                                      struct kvm_enable_cap *cap)
3601 {
3602         int r;
3603
3604         if (cap->flags)
3605                 return -EINVAL;
3606
3607         switch (cap->cap) {
3608         case KVM_CAP_S390_CSS_SUPPORT:
3609                 if (!vcpu->kvm->arch.css_support) {
3610                         vcpu->kvm->arch.css_support = 1;
3611                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3612                         trace_kvm_s390_enable_css(vcpu->kvm);
3613                 }
3614                 r = 0;
3615                 break;
3616         default:
3617                 r = -EINVAL;
3618                 break;
3619         }
3620         return r;
3621 }
3622
3623 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3624                                   struct kvm_s390_mem_op *mop)
3625 {
3626         void __user *uaddr = (void __user *)mop->buf;
3627         void *tmpbuf = NULL;
3628         int r, srcu_idx;
3629         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3630                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3631
3632         if (mop->flags & ~supported_flags)
3633                 return -EINVAL;
3634
3635         if (mop->size > MEM_OP_MAX_SIZE)
3636                 return -E2BIG;
3637
3638         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3639                 tmpbuf = vmalloc(mop->size);
3640                 if (!tmpbuf)
3641                         return -ENOMEM;
3642         }
3643
3644         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3645
3646         switch (mop->op) {
3647         case KVM_S390_MEMOP_LOGICAL_READ:
3648                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3649                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3650                                             mop->size, GACC_FETCH);
3651                         break;
3652                 }
3653                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3654                 if (r == 0) {
3655                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3656                                 r = -EFAULT;
3657                 }
3658                 break;
3659         case KVM_S390_MEMOP_LOGICAL_WRITE:
3660                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3661                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3662                                             mop->size, GACC_STORE);
3663                         break;
3664                 }
3665                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3666                         r = -EFAULT;
3667                         break;
3668                 }
3669                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3670                 break;
3671         default:
3672                 r = -EINVAL;
3673         }
3674
3675         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3676
3677         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3678                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3679
3680         vfree(tmpbuf);
3681         return r;
3682 }
3683
3684 long kvm_arch_vcpu_ioctl(struct file *filp,
3685                          unsigned int ioctl, unsigned long arg)
3686 {
3687         struct kvm_vcpu *vcpu = filp->private_data;
3688         void __user *argp = (void __user *)arg;
3689         int idx;
3690         long r;
3691
3692         switch (ioctl) {
3693         case KVM_S390_IRQ: {
3694                 struct kvm_s390_irq s390irq;
3695
3696                 r = -EFAULT;
3697                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3698                         break;
3699                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3700                 break;
3701         }
3702         case KVM_S390_INTERRUPT: {
3703                 struct kvm_s390_interrupt s390int;
3704                 struct kvm_s390_irq s390irq;
3705
3706                 r = -EFAULT;
3707                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3708                         break;
3709                 if (s390int_to_s390irq(&s390int, &s390irq))
3710                         return -EINVAL;
3711                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3712                 break;
3713         }
3714         case KVM_S390_STORE_STATUS:
3715                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3716                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3717                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3718                 break;
3719         case KVM_S390_SET_INITIAL_PSW: {
3720                 psw_t psw;
3721
3722                 r = -EFAULT;
3723                 if (copy_from_user(&psw, argp, sizeof(psw)))
3724                         break;
3725                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3726                 break;
3727         }
3728         case KVM_S390_INITIAL_RESET:
3729                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3730                 break;
3731         case KVM_SET_ONE_REG:
3732         case KVM_GET_ONE_REG: {
3733                 struct kvm_one_reg reg;
3734                 r = -EFAULT;
3735                 if (copy_from_user(&reg, argp, sizeof(reg)))
3736                         break;
3737                 if (ioctl == KVM_SET_ONE_REG)
3738                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3739                 else
3740                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3741                 break;
3742         }
3743 #ifdef CONFIG_KVM_S390_UCONTROL
3744         case KVM_S390_UCAS_MAP: {
3745                 struct kvm_s390_ucas_mapping ucasmap;
3746
3747                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3748                         r = -EFAULT;
3749                         break;
3750                 }
3751
3752                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3753                         r = -EINVAL;
3754                         break;
3755                 }
3756
3757                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3758                                      ucasmap.vcpu_addr, ucasmap.length);
3759                 break;
3760         }
3761         case KVM_S390_UCAS_UNMAP: {
3762                 struct kvm_s390_ucas_mapping ucasmap;
3763
3764                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3765                         r = -EFAULT;
3766                         break;
3767                 }
3768
3769                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3770                         r = -EINVAL;
3771                         break;
3772                 }
3773
3774                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3775                         ucasmap.length);
3776                 break;
3777         }
3778 #endif
3779         case KVM_S390_VCPU_FAULT: {
3780                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3781                 break;
3782         }
3783         case KVM_ENABLE_CAP:
3784         {
3785                 struct kvm_enable_cap cap;
3786                 r = -EFAULT;
3787                 if (copy_from_user(&cap, argp, sizeof(cap)))
3788                         break;
3789                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3790                 break;
3791         }
3792         case KVM_S390_MEM_OP: {
3793                 struct kvm_s390_mem_op mem_op;
3794
3795                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3796                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3797                 else
3798                         r = -EFAULT;
3799                 break;
3800         }
3801         case KVM_S390_SET_IRQ_STATE: {
3802                 struct kvm_s390_irq_state irq_state;
3803
3804                 r = -EFAULT;
3805                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3806                         break;
3807                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3808                     irq_state.len == 0 ||
3809                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3810                         r = -EINVAL;
3811                         break;
3812                 }
3813                 r = kvm_s390_set_irq_state(vcpu,
3814                                            (void __user *) irq_state.buf,
3815                                            irq_state.len);
3816                 break;
3817         }
3818         case KVM_S390_GET_IRQ_STATE: {
3819                 struct kvm_s390_irq_state irq_state;
3820
3821                 r = -EFAULT;
3822                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3823                         break;
3824                 if (irq_state.len == 0) {
3825                         r = -EINVAL;
3826                         break;
3827                 }
3828                 r = kvm_s390_get_irq_state(vcpu,
3829                                            (__u8 __user *)  irq_state.buf,
3830                                            irq_state.len);
3831                 break;
3832         }
3833         default:
3834                 r = -ENOTTY;
3835         }
3836         return r;
3837 }
3838
3839 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3840 {
3841 #ifdef CONFIG_KVM_S390_UCONTROL
3842         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3843                  && (kvm_is_ucontrol(vcpu->kvm))) {
3844                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3845                 get_page(vmf->page);
3846                 return 0;
3847         }
3848 #endif
3849         return VM_FAULT_SIGBUS;
3850 }
3851
3852 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3853                             unsigned long npages)
3854 {
3855         return 0;
3856 }
3857
3858 /* Section: memory related */
3859 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3860                                    struct kvm_memory_slot *memslot,
3861                                    const struct kvm_userspace_memory_region *mem,
3862                                    enum kvm_mr_change change)
3863 {
3864         /* A few sanity checks. We can have memory slots which have to be
3865            located/ended at a segment boundary (1MB). The memory in userland is
3866            ok to be fragmented into various different vmas. It is okay to mmap()
3867            and munmap() stuff in this slot after doing this call at any time */
3868
3869         if (mem->userspace_addr & 0xffffful)
3870                 return -EINVAL;
3871
3872         if (mem->memory_size & 0xffffful)
3873                 return -EINVAL;
3874
3875         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3876                 return -EINVAL;
3877
3878         return 0;
3879 }
3880
3881 void kvm_arch_commit_memory_region(struct kvm *kvm,
3882                                 const struct kvm_userspace_memory_region *mem,
3883                                 const struct kvm_memory_slot *old,
3884                                 const struct kvm_memory_slot *new,
3885                                 enum kvm_mr_change change)
3886 {
3887         int rc;
3888
3889         /* If the basics of the memslot do not change, we do not want
3890          * to update the gmap. Every update causes several unnecessary
3891          * segment translation exceptions. This is usually handled just
3892          * fine by the normal fault handler + gmap, but it will also
3893          * cause faults on the prefix page of running guest CPUs.
3894          */
3895         if (old->userspace_addr == mem->userspace_addr &&
3896             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3897             old->npages * PAGE_SIZE == mem->memory_size)
3898                 return;
3899
3900         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3901                 mem->guest_phys_addr, mem->memory_size);
3902         if (rc)
3903                 pr_warn("failed to commit memory region\n");
3904         return;
3905 }
3906
3907 static inline unsigned long nonhyp_mask(int i)
3908 {
3909         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3910
3911         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3912 }
3913
3914 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3915 {
3916         vcpu->valid_wakeup = false;
3917 }
3918
3919 static int __init kvm_s390_init(void)
3920 {
3921         int i;
3922
3923         if (!sclp.has_sief2) {
3924                 pr_info("SIE not available\n");
3925                 return -ENODEV;
3926         }
3927
3928         for (i = 0; i < 16; i++)
3929                 kvm_s390_fac_list_mask[i] |=
3930                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3931
3932         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3933 }
3934
3935 static void __exit kvm_s390_exit(void)
3936 {
3937         kvm_exit();
3938 }
3939
3940 module_init(kvm_s390_init);
3941 module_exit(kvm_s390_exit);
3942
3943 /*
3944  * Enable autoloading of the kvm module.
3945  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3946  * since x86 takes a different approach.
3947  */
3948 #include <linux/miscdevice.h>
3949 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3950 MODULE_ALIAS("devname:kvm");