2598cf243b86e08f21ccb6f9ae6ea2e28465d680
[sfrench/cifs-2.6.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2017
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include "kvm-s390.h"
44 #include "gaccess.h"
45
46 #define KMSG_COMPONENT "kvm-s390"
47 #undef pr_fmt
48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62         { "userspace_handled", VCPU_STAT(exit_userspace) },
63         { "exit_null", VCPU_STAT(exit_null) },
64         { "exit_validity", VCPU_STAT(exit_validity) },
65         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
66         { "exit_external_request", VCPU_STAT(exit_external_request) },
67         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
68         { "exit_instruction", VCPU_STAT(exit_instruction) },
69         { "exit_pei", VCPU_STAT(exit_pei) },
70         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
71         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
72         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
73         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
74         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
75         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
76         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
77         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
78         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
79         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
80         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
81         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
82         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
83         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
84         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
85         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
86         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
87         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
88         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
89         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
90         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
91         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
92         { "instruction_spx", VCPU_STAT(instruction_spx) },
93         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
94         { "instruction_stap", VCPU_STAT(instruction_stap) },
95         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
96         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
97         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
98         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
99         { "instruction_essa", VCPU_STAT(instruction_essa) },
100         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
101         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
102         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
103         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
104         { "instruction_sie", VCPU_STAT(instruction_sie) },
105         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
106         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
107         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
108         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
109         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
110         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
111         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
112         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
113         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
114         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
115         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
116         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
117         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
118         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
119         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
120         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
121         { "diagnose_10", VCPU_STAT(diagnose_10) },
122         { "diagnose_44", VCPU_STAT(diagnose_44) },
123         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
124         { "diagnose_258", VCPU_STAT(diagnose_258) },
125         { "diagnose_308", VCPU_STAT(diagnose_308) },
126         { "diagnose_500", VCPU_STAT(diagnose_500) },
127         { NULL }
128 };
129
130 struct kvm_s390_tod_clock_ext {
131         __u8 epoch_idx;
132         __u64 tod;
133         __u8 reserved[7];
134 } __packed;
135
136 /* allow nested virtualization in KVM (if enabled by user space) */
137 static int nested;
138 module_param(nested, int, S_IRUGO);
139 MODULE_PARM_DESC(nested, "Nested virtualization support");
140
141 /* upper facilities limit for kvm */
142 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
143
144 unsigned long kvm_s390_fac_list_mask_size(void)
145 {
146         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
147         return ARRAY_SIZE(kvm_s390_fac_list_mask);
148 }
149
150 /* available cpu features supported by kvm */
151 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
152 /* available subfunctions indicated via query / "test bit" */
153 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
154
155 static struct gmap_notifier gmap_notifier;
156 static struct gmap_notifier vsie_gmap_notifier;
157 debug_info_t *kvm_s390_dbf;
158
159 /* Section: not file related */
160 int kvm_arch_hardware_enable(void)
161 {
162         /* every s390 is virtualization enabled ;-) */
163         return 0;
164 }
165
166 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
167                               unsigned long end);
168
169 /*
170  * This callback is executed during stop_machine(). All CPUs are therefore
171  * temporarily stopped. In order not to change guest behavior, we have to
172  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
173  * so a CPU won't be stopped while calculating with the epoch.
174  */
175 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
176                           void *v)
177 {
178         struct kvm *kvm;
179         struct kvm_vcpu *vcpu;
180         int i;
181         unsigned long long *delta = v;
182
183         list_for_each_entry(kvm, &vm_list, vm_list) {
184                 kvm->arch.epoch -= *delta;
185                 kvm_for_each_vcpu(i, vcpu, kvm) {
186                         vcpu->arch.sie_block->epoch -= *delta;
187                         if (vcpu->arch.cputm_enabled)
188                                 vcpu->arch.cputm_start += *delta;
189                         if (vcpu->arch.vsie_block)
190                                 vcpu->arch.vsie_block->epoch -= *delta;
191                 }
192         }
193         return NOTIFY_OK;
194 }
195
196 static struct notifier_block kvm_clock_notifier = {
197         .notifier_call = kvm_clock_sync,
198 };
199
200 int kvm_arch_hardware_setup(void)
201 {
202         gmap_notifier.notifier_call = kvm_gmap_notifier;
203         gmap_register_pte_notifier(&gmap_notifier);
204         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
205         gmap_register_pte_notifier(&vsie_gmap_notifier);
206         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
207                                        &kvm_clock_notifier);
208         return 0;
209 }
210
211 void kvm_arch_hardware_unsetup(void)
212 {
213         gmap_unregister_pte_notifier(&gmap_notifier);
214         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
215         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
216                                          &kvm_clock_notifier);
217 }
218
219 static void allow_cpu_feat(unsigned long nr)
220 {
221         set_bit_inv(nr, kvm_s390_available_cpu_feat);
222 }
223
224 static inline int plo_test_bit(unsigned char nr)
225 {
226         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
227         int cc;
228
229         asm volatile(
230                 /* Parameter registers are ignored for "test bit" */
231                 "       plo     0,0,0,0(0)\n"
232                 "       ipm     %0\n"
233                 "       srl     %0,28\n"
234                 : "=d" (cc)
235                 : "d" (r0)
236                 : "cc");
237         return cc == 0;
238 }
239
240 static void kvm_s390_cpu_feat_init(void)
241 {
242         int i;
243
244         for (i = 0; i < 256; ++i) {
245                 if (plo_test_bit(i))
246                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
247         }
248
249         if (test_facility(28)) /* TOD-clock steering */
250                 ptff(kvm_s390_available_subfunc.ptff,
251                      sizeof(kvm_s390_available_subfunc.ptff),
252                      PTFF_QAF);
253
254         if (test_facility(17)) { /* MSA */
255                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
256                               kvm_s390_available_subfunc.kmac);
257                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
258                               kvm_s390_available_subfunc.kmc);
259                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
260                               kvm_s390_available_subfunc.km);
261                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
262                               kvm_s390_available_subfunc.kimd);
263                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.klmd);
265         }
266         if (test_facility(76)) /* MSA3 */
267                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.pckmo);
269         if (test_facility(77)) { /* MSA4 */
270                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
271                               kvm_s390_available_subfunc.kmctr);
272                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
273                               kvm_s390_available_subfunc.kmf);
274                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
275                               kvm_s390_available_subfunc.kmo);
276                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
277                               kvm_s390_available_subfunc.pcc);
278         }
279         if (test_facility(57)) /* MSA5 */
280                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
281                               kvm_s390_available_subfunc.ppno);
282
283         if (test_facility(146)) /* MSA8 */
284                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
285                               kvm_s390_available_subfunc.kma);
286
287         if (MACHINE_HAS_ESOP)
288                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
289         /*
290          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
291          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
292          */
293         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
294             !test_facility(3) || !nested)
295                 return;
296         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
297         if (sclp.has_64bscao)
298                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
299         if (sclp.has_siif)
300                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
301         if (sclp.has_gpere)
302                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
303         if (sclp.has_gsls)
304                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
305         if (sclp.has_ib)
306                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
307         if (sclp.has_cei)
308                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
309         if (sclp.has_ibs)
310                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
311         if (sclp.has_kss)
312                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
313         /*
314          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
315          * all skey handling functions read/set the skey from the PGSTE
316          * instead of the real storage key.
317          *
318          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
319          * pages being detected as preserved although they are resident.
320          *
321          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
322          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
323          *
324          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
325          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
326          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
327          *
328          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
329          * cannot easily shadow the SCA because of the ipte lock.
330          */
331 }
332
333 int kvm_arch_init(void *opaque)
334 {
335         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
336         if (!kvm_s390_dbf)
337                 return -ENOMEM;
338
339         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
340                 debug_unregister(kvm_s390_dbf);
341                 return -ENOMEM;
342         }
343
344         kvm_s390_cpu_feat_init();
345
346         /* Register floating interrupt controller interface. */
347         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
348 }
349
350 void kvm_arch_exit(void)
351 {
352         debug_unregister(kvm_s390_dbf);
353 }
354
355 /* Section: device related */
356 long kvm_arch_dev_ioctl(struct file *filp,
357                         unsigned int ioctl, unsigned long arg)
358 {
359         if (ioctl == KVM_S390_ENABLE_SIE)
360                 return s390_enable_sie();
361         return -EINVAL;
362 }
363
364 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
365 {
366         int r;
367
368         switch (ext) {
369         case KVM_CAP_S390_PSW:
370         case KVM_CAP_S390_GMAP:
371         case KVM_CAP_SYNC_MMU:
372 #ifdef CONFIG_KVM_S390_UCONTROL
373         case KVM_CAP_S390_UCONTROL:
374 #endif
375         case KVM_CAP_ASYNC_PF:
376         case KVM_CAP_SYNC_REGS:
377         case KVM_CAP_ONE_REG:
378         case KVM_CAP_ENABLE_CAP:
379         case KVM_CAP_S390_CSS_SUPPORT:
380         case KVM_CAP_IOEVENTFD:
381         case KVM_CAP_DEVICE_CTRL:
382         case KVM_CAP_ENABLE_CAP_VM:
383         case KVM_CAP_S390_IRQCHIP:
384         case KVM_CAP_VM_ATTRIBUTES:
385         case KVM_CAP_MP_STATE:
386         case KVM_CAP_IMMEDIATE_EXIT:
387         case KVM_CAP_S390_INJECT_IRQ:
388         case KVM_CAP_S390_USER_SIGP:
389         case KVM_CAP_S390_USER_STSI:
390         case KVM_CAP_S390_SKEYS:
391         case KVM_CAP_S390_IRQ_STATE:
392         case KVM_CAP_S390_USER_INSTR0:
393         case KVM_CAP_S390_CMMA_MIGRATION:
394         case KVM_CAP_S390_AIS:
395         case KVM_CAP_S390_AIS_MIGRATION:
396                 r = 1;
397                 break;
398         case KVM_CAP_S390_MEM_OP:
399                 r = MEM_OP_MAX_SIZE;
400                 break;
401         case KVM_CAP_NR_VCPUS:
402         case KVM_CAP_MAX_VCPUS:
403                 r = KVM_S390_BSCA_CPU_SLOTS;
404                 if (!kvm_s390_use_sca_entries())
405                         r = KVM_MAX_VCPUS;
406                 else if (sclp.has_esca && sclp.has_64bscao)
407                         r = KVM_S390_ESCA_CPU_SLOTS;
408                 break;
409         case KVM_CAP_NR_MEMSLOTS:
410                 r = KVM_USER_MEM_SLOTS;
411                 break;
412         case KVM_CAP_S390_COW:
413                 r = MACHINE_HAS_ESOP;
414                 break;
415         case KVM_CAP_S390_VECTOR_REGISTERS:
416                 r = MACHINE_HAS_VX;
417                 break;
418         case KVM_CAP_S390_RI:
419                 r = test_facility(64);
420                 break;
421         case KVM_CAP_S390_GS:
422                 r = test_facility(133);
423                 break;
424         case KVM_CAP_S390_BPB:
425                 r = test_facility(82);
426                 break;
427         default:
428                 r = 0;
429         }
430         return r;
431 }
432
433 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
434                                         struct kvm_memory_slot *memslot)
435 {
436         gfn_t cur_gfn, last_gfn;
437         unsigned long address;
438         struct gmap *gmap = kvm->arch.gmap;
439
440         /* Loop over all guest pages */
441         last_gfn = memslot->base_gfn + memslot->npages;
442         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
443                 address = gfn_to_hva_memslot(memslot, cur_gfn);
444
445                 if (test_and_clear_guest_dirty(gmap->mm, address))
446                         mark_page_dirty(kvm, cur_gfn);
447                 if (fatal_signal_pending(current))
448                         return;
449                 cond_resched();
450         }
451 }
452
453 /* Section: vm related */
454 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
455
456 /*
457  * Get (and clear) the dirty memory log for a memory slot.
458  */
459 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
460                                struct kvm_dirty_log *log)
461 {
462         int r;
463         unsigned long n;
464         struct kvm_memslots *slots;
465         struct kvm_memory_slot *memslot;
466         int is_dirty = 0;
467
468         if (kvm_is_ucontrol(kvm))
469                 return -EINVAL;
470
471         mutex_lock(&kvm->slots_lock);
472
473         r = -EINVAL;
474         if (log->slot >= KVM_USER_MEM_SLOTS)
475                 goto out;
476
477         slots = kvm_memslots(kvm);
478         memslot = id_to_memslot(slots, log->slot);
479         r = -ENOENT;
480         if (!memslot->dirty_bitmap)
481                 goto out;
482
483         kvm_s390_sync_dirty_log(kvm, memslot);
484         r = kvm_get_dirty_log(kvm, log, &is_dirty);
485         if (r)
486                 goto out;
487
488         /* Clear the dirty log */
489         if (is_dirty) {
490                 n = kvm_dirty_bitmap_bytes(memslot);
491                 memset(memslot->dirty_bitmap, 0, n);
492         }
493         r = 0;
494 out:
495         mutex_unlock(&kvm->slots_lock);
496         return r;
497 }
498
499 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
500 {
501         unsigned int i;
502         struct kvm_vcpu *vcpu;
503
504         kvm_for_each_vcpu(i, vcpu, kvm) {
505                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
506         }
507 }
508
509 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
510 {
511         int r;
512
513         if (cap->flags)
514                 return -EINVAL;
515
516         switch (cap->cap) {
517         case KVM_CAP_S390_IRQCHIP:
518                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
519                 kvm->arch.use_irqchip = 1;
520                 r = 0;
521                 break;
522         case KVM_CAP_S390_USER_SIGP:
523                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
524                 kvm->arch.user_sigp = 1;
525                 r = 0;
526                 break;
527         case KVM_CAP_S390_VECTOR_REGISTERS:
528                 mutex_lock(&kvm->lock);
529                 if (kvm->created_vcpus) {
530                         r = -EBUSY;
531                 } else if (MACHINE_HAS_VX) {
532                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
533                         set_kvm_facility(kvm->arch.model.fac_list, 129);
534                         if (test_facility(134)) {
535                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
536                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
537                         }
538                         if (test_facility(135)) {
539                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
540                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
541                         }
542                         r = 0;
543                 } else
544                         r = -EINVAL;
545                 mutex_unlock(&kvm->lock);
546                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
547                          r ? "(not available)" : "(success)");
548                 break;
549         case KVM_CAP_S390_RI:
550                 r = -EINVAL;
551                 mutex_lock(&kvm->lock);
552                 if (kvm->created_vcpus) {
553                         r = -EBUSY;
554                 } else if (test_facility(64)) {
555                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
556                         set_kvm_facility(kvm->arch.model.fac_list, 64);
557                         r = 0;
558                 }
559                 mutex_unlock(&kvm->lock);
560                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
561                          r ? "(not available)" : "(success)");
562                 break;
563         case KVM_CAP_S390_AIS:
564                 mutex_lock(&kvm->lock);
565                 if (kvm->created_vcpus) {
566                         r = -EBUSY;
567                 } else {
568                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
569                         set_kvm_facility(kvm->arch.model.fac_list, 72);
570                         r = 0;
571                 }
572                 mutex_unlock(&kvm->lock);
573                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
574                          r ? "(not available)" : "(success)");
575                 break;
576         case KVM_CAP_S390_GS:
577                 r = -EINVAL;
578                 mutex_lock(&kvm->lock);
579                 if (atomic_read(&kvm->online_vcpus)) {
580                         r = -EBUSY;
581                 } else if (test_facility(133)) {
582                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
583                         set_kvm_facility(kvm->arch.model.fac_list, 133);
584                         r = 0;
585                 }
586                 mutex_unlock(&kvm->lock);
587                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
588                          r ? "(not available)" : "(success)");
589                 break;
590         case KVM_CAP_S390_USER_STSI:
591                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
592                 kvm->arch.user_stsi = 1;
593                 r = 0;
594                 break;
595         case KVM_CAP_S390_USER_INSTR0:
596                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
597                 kvm->arch.user_instr0 = 1;
598                 icpt_operexc_on_all_vcpus(kvm);
599                 r = 0;
600                 break;
601         default:
602                 r = -EINVAL;
603                 break;
604         }
605         return r;
606 }
607
608 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
609 {
610         int ret;
611
612         switch (attr->attr) {
613         case KVM_S390_VM_MEM_LIMIT_SIZE:
614                 ret = 0;
615                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
616                          kvm->arch.mem_limit);
617                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
618                         ret = -EFAULT;
619                 break;
620         default:
621                 ret = -ENXIO;
622                 break;
623         }
624         return ret;
625 }
626
627 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
628 {
629         int ret;
630         unsigned int idx;
631         switch (attr->attr) {
632         case KVM_S390_VM_MEM_ENABLE_CMMA:
633                 ret = -ENXIO;
634                 if (!sclp.has_cmma)
635                         break;
636
637                 ret = -EBUSY;
638                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
639                 mutex_lock(&kvm->lock);
640                 if (!kvm->created_vcpus) {
641                         kvm->arch.use_cmma = 1;
642                         ret = 0;
643                 }
644                 mutex_unlock(&kvm->lock);
645                 break;
646         case KVM_S390_VM_MEM_CLR_CMMA:
647                 ret = -ENXIO;
648                 if (!sclp.has_cmma)
649                         break;
650                 ret = -EINVAL;
651                 if (!kvm->arch.use_cmma)
652                         break;
653
654                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
655                 mutex_lock(&kvm->lock);
656                 idx = srcu_read_lock(&kvm->srcu);
657                 s390_reset_cmma(kvm->arch.gmap->mm);
658                 srcu_read_unlock(&kvm->srcu, idx);
659                 mutex_unlock(&kvm->lock);
660                 ret = 0;
661                 break;
662         case KVM_S390_VM_MEM_LIMIT_SIZE: {
663                 unsigned long new_limit;
664
665                 if (kvm_is_ucontrol(kvm))
666                         return -EINVAL;
667
668                 if (get_user(new_limit, (u64 __user *)attr->addr))
669                         return -EFAULT;
670
671                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
672                     new_limit > kvm->arch.mem_limit)
673                         return -E2BIG;
674
675                 if (!new_limit)
676                         return -EINVAL;
677
678                 /* gmap_create takes last usable address */
679                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
680                         new_limit -= 1;
681
682                 ret = -EBUSY;
683                 mutex_lock(&kvm->lock);
684                 if (!kvm->created_vcpus) {
685                         /* gmap_create will round the limit up */
686                         struct gmap *new = gmap_create(current->mm, new_limit);
687
688                         if (!new) {
689                                 ret = -ENOMEM;
690                         } else {
691                                 gmap_remove(kvm->arch.gmap);
692                                 new->private = kvm;
693                                 kvm->arch.gmap = new;
694                                 ret = 0;
695                         }
696                 }
697                 mutex_unlock(&kvm->lock);
698                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
699                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
700                          (void *) kvm->arch.gmap->asce);
701                 break;
702         }
703         default:
704                 ret = -ENXIO;
705                 break;
706         }
707         return ret;
708 }
709
710 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
711
712 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
713 {
714         struct kvm_vcpu *vcpu;
715         int i;
716
717         if (!test_kvm_facility(kvm, 76))
718                 return -EINVAL;
719
720         mutex_lock(&kvm->lock);
721         switch (attr->attr) {
722         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
723                 get_random_bytes(
724                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
725                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
726                 kvm->arch.crypto.aes_kw = 1;
727                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
728                 break;
729         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
730                 get_random_bytes(
731                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
732                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
733                 kvm->arch.crypto.dea_kw = 1;
734                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
735                 break;
736         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
737                 kvm->arch.crypto.aes_kw = 0;
738                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
739                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
740                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
741                 break;
742         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
743                 kvm->arch.crypto.dea_kw = 0;
744                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
745                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
746                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
747                 break;
748         default:
749                 mutex_unlock(&kvm->lock);
750                 return -ENXIO;
751         }
752
753         kvm_for_each_vcpu(i, vcpu, kvm) {
754                 kvm_s390_vcpu_crypto_setup(vcpu);
755                 exit_sie(vcpu);
756         }
757         mutex_unlock(&kvm->lock);
758         return 0;
759 }
760
761 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
762 {
763         int cx;
764         struct kvm_vcpu *vcpu;
765
766         kvm_for_each_vcpu(cx, vcpu, kvm)
767                 kvm_s390_sync_request(req, vcpu);
768 }
769
770 /*
771  * Must be called with kvm->srcu held to avoid races on memslots, and with
772  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
773  */
774 static int kvm_s390_vm_start_migration(struct kvm *kvm)
775 {
776         struct kvm_s390_migration_state *mgs;
777         struct kvm_memory_slot *ms;
778         /* should be the only one */
779         struct kvm_memslots *slots;
780         unsigned long ram_pages;
781         int slotnr;
782
783         /* migration mode already enabled */
784         if (kvm->arch.migration_state)
785                 return 0;
786
787         slots = kvm_memslots(kvm);
788         if (!slots || !slots->used_slots)
789                 return -EINVAL;
790
791         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
792         if (!mgs)
793                 return -ENOMEM;
794         kvm->arch.migration_state = mgs;
795
796         if (kvm->arch.use_cmma) {
797                 /*
798                  * Get the first slot. They are reverse sorted by base_gfn, so
799                  * the first slot is also the one at the end of the address
800                  * space. We have verified above that at least one slot is
801                  * present.
802                  */
803                 ms = slots->memslots;
804                 /* round up so we only use full longs */
805                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
806                 /* allocate enough bytes to store all the bits */
807                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
808                 if (!mgs->pgste_bitmap) {
809                         kfree(mgs);
810                         kvm->arch.migration_state = NULL;
811                         return -ENOMEM;
812                 }
813
814                 mgs->bitmap_size = ram_pages;
815                 atomic64_set(&mgs->dirty_pages, ram_pages);
816                 /* mark all the pages in active slots as dirty */
817                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
818                         ms = slots->memslots + slotnr;
819                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
820                 }
821
822                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
823         }
824         return 0;
825 }
826
827 /*
828  * Must be called with kvm->lock to avoid races with ourselves and
829  * kvm_s390_vm_start_migration.
830  */
831 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
832 {
833         struct kvm_s390_migration_state *mgs;
834
835         /* migration mode already disabled */
836         if (!kvm->arch.migration_state)
837                 return 0;
838         mgs = kvm->arch.migration_state;
839         kvm->arch.migration_state = NULL;
840
841         if (kvm->arch.use_cmma) {
842                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
843                 vfree(mgs->pgste_bitmap);
844         }
845         kfree(mgs);
846         return 0;
847 }
848
849 static int kvm_s390_vm_set_migration(struct kvm *kvm,
850                                      struct kvm_device_attr *attr)
851 {
852         int idx, res = -ENXIO;
853
854         mutex_lock(&kvm->lock);
855         switch (attr->attr) {
856         case KVM_S390_VM_MIGRATION_START:
857                 idx = srcu_read_lock(&kvm->srcu);
858                 res = kvm_s390_vm_start_migration(kvm);
859                 srcu_read_unlock(&kvm->srcu, idx);
860                 break;
861         case KVM_S390_VM_MIGRATION_STOP:
862                 res = kvm_s390_vm_stop_migration(kvm);
863                 break;
864         default:
865                 break;
866         }
867         mutex_unlock(&kvm->lock);
868
869         return res;
870 }
871
872 static int kvm_s390_vm_get_migration(struct kvm *kvm,
873                                      struct kvm_device_attr *attr)
874 {
875         u64 mig = (kvm->arch.migration_state != NULL);
876
877         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
878                 return -ENXIO;
879
880         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
881                 return -EFAULT;
882         return 0;
883 }
884
885 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
886 {
887         struct kvm_s390_vm_tod_clock gtod;
888
889         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
890                 return -EFAULT;
891
892         if (test_kvm_facility(kvm, 139))
893                 kvm_s390_set_tod_clock_ext(kvm, &gtod);
894         else if (gtod.epoch_idx == 0)
895                 kvm_s390_set_tod_clock(kvm, gtod.tod);
896         else
897                 return -EINVAL;
898
899         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
900                 gtod.epoch_idx, gtod.tod);
901
902         return 0;
903 }
904
905 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
906 {
907         u8 gtod_high;
908
909         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
910                                            sizeof(gtod_high)))
911                 return -EFAULT;
912
913         if (gtod_high != 0)
914                 return -EINVAL;
915         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
916
917         return 0;
918 }
919
920 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
921 {
922         u64 gtod;
923
924         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
925                 return -EFAULT;
926
927         kvm_s390_set_tod_clock(kvm, gtod);
928         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
929         return 0;
930 }
931
932 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
933 {
934         int ret;
935
936         if (attr->flags)
937                 return -EINVAL;
938
939         switch (attr->attr) {
940         case KVM_S390_VM_TOD_EXT:
941                 ret = kvm_s390_set_tod_ext(kvm, attr);
942                 break;
943         case KVM_S390_VM_TOD_HIGH:
944                 ret = kvm_s390_set_tod_high(kvm, attr);
945                 break;
946         case KVM_S390_VM_TOD_LOW:
947                 ret = kvm_s390_set_tod_low(kvm, attr);
948                 break;
949         default:
950                 ret = -ENXIO;
951                 break;
952         }
953         return ret;
954 }
955
956 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
957                                         struct kvm_s390_vm_tod_clock *gtod)
958 {
959         struct kvm_s390_tod_clock_ext htod;
960
961         preempt_disable();
962
963         get_tod_clock_ext((char *)&htod);
964
965         gtod->tod = htod.tod + kvm->arch.epoch;
966         gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
967
968         if (gtod->tod < htod.tod)
969                 gtod->epoch_idx += 1;
970
971         preempt_enable();
972 }
973
974 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
975 {
976         struct kvm_s390_vm_tod_clock gtod;
977
978         memset(&gtod, 0, sizeof(gtod));
979
980         if (test_kvm_facility(kvm, 139))
981                 kvm_s390_get_tod_clock_ext(kvm, &gtod);
982         else
983                 gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
984
985         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
986                 return -EFAULT;
987
988         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
989                 gtod.epoch_idx, gtod.tod);
990         return 0;
991 }
992
993 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
994 {
995         u8 gtod_high = 0;
996
997         if (copy_to_user((void __user *)attr->addr, &gtod_high,
998                                          sizeof(gtod_high)))
999                 return -EFAULT;
1000         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1001
1002         return 0;
1003 }
1004
1005 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1006 {
1007         u64 gtod;
1008
1009         gtod = kvm_s390_get_tod_clock_fast(kvm);
1010         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1011                 return -EFAULT;
1012         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1013
1014         return 0;
1015 }
1016
1017 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1018 {
1019         int ret;
1020
1021         if (attr->flags)
1022                 return -EINVAL;
1023
1024         switch (attr->attr) {
1025         case KVM_S390_VM_TOD_EXT:
1026                 ret = kvm_s390_get_tod_ext(kvm, attr);
1027                 break;
1028         case KVM_S390_VM_TOD_HIGH:
1029                 ret = kvm_s390_get_tod_high(kvm, attr);
1030                 break;
1031         case KVM_S390_VM_TOD_LOW:
1032                 ret = kvm_s390_get_tod_low(kvm, attr);
1033                 break;
1034         default:
1035                 ret = -ENXIO;
1036                 break;
1037         }
1038         return ret;
1039 }
1040
1041 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1042 {
1043         struct kvm_s390_vm_cpu_processor *proc;
1044         u16 lowest_ibc, unblocked_ibc;
1045         int ret = 0;
1046
1047         mutex_lock(&kvm->lock);
1048         if (kvm->created_vcpus) {
1049                 ret = -EBUSY;
1050                 goto out;
1051         }
1052         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1053         if (!proc) {
1054                 ret = -ENOMEM;
1055                 goto out;
1056         }
1057         if (!copy_from_user(proc, (void __user *)attr->addr,
1058                             sizeof(*proc))) {
1059                 kvm->arch.model.cpuid = proc->cpuid;
1060                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1061                 unblocked_ibc = sclp.ibc & 0xfff;
1062                 if (lowest_ibc && proc->ibc) {
1063                         if (proc->ibc > unblocked_ibc)
1064                                 kvm->arch.model.ibc = unblocked_ibc;
1065                         else if (proc->ibc < lowest_ibc)
1066                                 kvm->arch.model.ibc = lowest_ibc;
1067                         else
1068                                 kvm->arch.model.ibc = proc->ibc;
1069                 }
1070                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1071                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1072                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1073                          kvm->arch.model.ibc,
1074                          kvm->arch.model.cpuid);
1075                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1076                          kvm->arch.model.fac_list[0],
1077                          kvm->arch.model.fac_list[1],
1078                          kvm->arch.model.fac_list[2]);
1079         } else
1080                 ret = -EFAULT;
1081         kfree(proc);
1082 out:
1083         mutex_unlock(&kvm->lock);
1084         return ret;
1085 }
1086
1087 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1088                                        struct kvm_device_attr *attr)
1089 {
1090         struct kvm_s390_vm_cpu_feat data;
1091         int ret = -EBUSY;
1092
1093         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1094                 return -EFAULT;
1095         if (!bitmap_subset((unsigned long *) data.feat,
1096                            kvm_s390_available_cpu_feat,
1097                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1098                 return -EINVAL;
1099
1100         mutex_lock(&kvm->lock);
1101         if (!atomic_read(&kvm->online_vcpus)) {
1102                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1103                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1104                 ret = 0;
1105         }
1106         mutex_unlock(&kvm->lock);
1107         return ret;
1108 }
1109
1110 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1111                                           struct kvm_device_attr *attr)
1112 {
1113         /*
1114          * Once supported by kernel + hw, we have to store the subfunctions
1115          * in kvm->arch and remember that user space configured them.
1116          */
1117         return -ENXIO;
1118 }
1119
1120 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1121 {
1122         int ret = -ENXIO;
1123
1124         switch (attr->attr) {
1125         case KVM_S390_VM_CPU_PROCESSOR:
1126                 ret = kvm_s390_set_processor(kvm, attr);
1127                 break;
1128         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1129                 ret = kvm_s390_set_processor_feat(kvm, attr);
1130                 break;
1131         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1132                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1133                 break;
1134         }
1135         return ret;
1136 }
1137
1138 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140         struct kvm_s390_vm_cpu_processor *proc;
1141         int ret = 0;
1142
1143         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1144         if (!proc) {
1145                 ret = -ENOMEM;
1146                 goto out;
1147         }
1148         proc->cpuid = kvm->arch.model.cpuid;
1149         proc->ibc = kvm->arch.model.ibc;
1150         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1151                S390_ARCH_FAC_LIST_SIZE_BYTE);
1152         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1153                  kvm->arch.model.ibc,
1154                  kvm->arch.model.cpuid);
1155         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1156                  kvm->arch.model.fac_list[0],
1157                  kvm->arch.model.fac_list[1],
1158                  kvm->arch.model.fac_list[2]);
1159         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1160                 ret = -EFAULT;
1161         kfree(proc);
1162 out:
1163         return ret;
1164 }
1165
1166 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168         struct kvm_s390_vm_cpu_machine *mach;
1169         int ret = 0;
1170
1171         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1172         if (!mach) {
1173                 ret = -ENOMEM;
1174                 goto out;
1175         }
1176         get_cpu_id((struct cpuid *) &mach->cpuid);
1177         mach->ibc = sclp.ibc;
1178         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1179                S390_ARCH_FAC_LIST_SIZE_BYTE);
1180         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1181                sizeof(S390_lowcore.stfle_fac_list));
1182         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1183                  kvm->arch.model.ibc,
1184                  kvm->arch.model.cpuid);
1185         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1186                  mach->fac_mask[0],
1187                  mach->fac_mask[1],
1188                  mach->fac_mask[2]);
1189         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1190                  mach->fac_list[0],
1191                  mach->fac_list[1],
1192                  mach->fac_list[2]);
1193         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1194                 ret = -EFAULT;
1195         kfree(mach);
1196 out:
1197         return ret;
1198 }
1199
1200 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1201                                        struct kvm_device_attr *attr)
1202 {
1203         struct kvm_s390_vm_cpu_feat data;
1204
1205         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1206                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1207         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1208                 return -EFAULT;
1209         return 0;
1210 }
1211
1212 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1213                                      struct kvm_device_attr *attr)
1214 {
1215         struct kvm_s390_vm_cpu_feat data;
1216
1217         bitmap_copy((unsigned long *) data.feat,
1218                     kvm_s390_available_cpu_feat,
1219                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1220         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1221                 return -EFAULT;
1222         return 0;
1223 }
1224
1225 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1226                                           struct kvm_device_attr *attr)
1227 {
1228         /*
1229          * Once we can actually configure subfunctions (kernel + hw support),
1230          * we have to check if they were already set by user space, if so copy
1231          * them from kvm->arch.
1232          */
1233         return -ENXIO;
1234 }
1235
1236 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1237                                         struct kvm_device_attr *attr)
1238 {
1239         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1240             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1241                 return -EFAULT;
1242         return 0;
1243 }
1244 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1245 {
1246         int ret = -ENXIO;
1247
1248         switch (attr->attr) {
1249         case KVM_S390_VM_CPU_PROCESSOR:
1250                 ret = kvm_s390_get_processor(kvm, attr);
1251                 break;
1252         case KVM_S390_VM_CPU_MACHINE:
1253                 ret = kvm_s390_get_machine(kvm, attr);
1254                 break;
1255         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1256                 ret = kvm_s390_get_processor_feat(kvm, attr);
1257                 break;
1258         case KVM_S390_VM_CPU_MACHINE_FEAT:
1259                 ret = kvm_s390_get_machine_feat(kvm, attr);
1260                 break;
1261         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1262                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1263                 break;
1264         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1265                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1266                 break;
1267         }
1268         return ret;
1269 }
1270
1271 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1272 {
1273         int ret;
1274
1275         switch (attr->group) {
1276         case KVM_S390_VM_MEM_CTRL:
1277                 ret = kvm_s390_set_mem_control(kvm, attr);
1278                 break;
1279         case KVM_S390_VM_TOD:
1280                 ret = kvm_s390_set_tod(kvm, attr);
1281                 break;
1282         case KVM_S390_VM_CPU_MODEL:
1283                 ret = kvm_s390_set_cpu_model(kvm, attr);
1284                 break;
1285         case KVM_S390_VM_CRYPTO:
1286                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1287                 break;
1288         case KVM_S390_VM_MIGRATION:
1289                 ret = kvm_s390_vm_set_migration(kvm, attr);
1290                 break;
1291         default:
1292                 ret = -ENXIO;
1293                 break;
1294         }
1295
1296         return ret;
1297 }
1298
1299 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1300 {
1301         int ret;
1302
1303         switch (attr->group) {
1304         case KVM_S390_VM_MEM_CTRL:
1305                 ret = kvm_s390_get_mem_control(kvm, attr);
1306                 break;
1307         case KVM_S390_VM_TOD:
1308                 ret = kvm_s390_get_tod(kvm, attr);
1309                 break;
1310         case KVM_S390_VM_CPU_MODEL:
1311                 ret = kvm_s390_get_cpu_model(kvm, attr);
1312                 break;
1313         case KVM_S390_VM_MIGRATION:
1314                 ret = kvm_s390_vm_get_migration(kvm, attr);
1315                 break;
1316         default:
1317                 ret = -ENXIO;
1318                 break;
1319         }
1320
1321         return ret;
1322 }
1323
1324 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1325 {
1326         int ret;
1327
1328         switch (attr->group) {
1329         case KVM_S390_VM_MEM_CTRL:
1330                 switch (attr->attr) {
1331                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1332                 case KVM_S390_VM_MEM_CLR_CMMA:
1333                         ret = sclp.has_cmma ? 0 : -ENXIO;
1334                         break;
1335                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1336                         ret = 0;
1337                         break;
1338                 default:
1339                         ret = -ENXIO;
1340                         break;
1341                 }
1342                 break;
1343         case KVM_S390_VM_TOD:
1344                 switch (attr->attr) {
1345                 case KVM_S390_VM_TOD_LOW:
1346                 case KVM_S390_VM_TOD_HIGH:
1347                         ret = 0;
1348                         break;
1349                 default:
1350                         ret = -ENXIO;
1351                         break;
1352                 }
1353                 break;
1354         case KVM_S390_VM_CPU_MODEL:
1355                 switch (attr->attr) {
1356                 case KVM_S390_VM_CPU_PROCESSOR:
1357                 case KVM_S390_VM_CPU_MACHINE:
1358                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1359                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1360                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1361                         ret = 0;
1362                         break;
1363                 /* configuring subfunctions is not supported yet */
1364                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1365                 default:
1366                         ret = -ENXIO;
1367                         break;
1368                 }
1369                 break;
1370         case KVM_S390_VM_CRYPTO:
1371                 switch (attr->attr) {
1372                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1373                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1374                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1375                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1376                         ret = 0;
1377                         break;
1378                 default:
1379                         ret = -ENXIO;
1380                         break;
1381                 }
1382                 break;
1383         case KVM_S390_VM_MIGRATION:
1384                 ret = 0;
1385                 break;
1386         default:
1387                 ret = -ENXIO;
1388                 break;
1389         }
1390
1391         return ret;
1392 }
1393
1394 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1395 {
1396         uint8_t *keys;
1397         uint64_t hva;
1398         int srcu_idx, i, r = 0;
1399
1400         if (args->flags != 0)
1401                 return -EINVAL;
1402
1403         /* Is this guest using storage keys? */
1404         if (!mm_use_skey(current->mm))
1405                 return KVM_S390_GET_SKEYS_NONE;
1406
1407         /* Enforce sane limit on memory allocation */
1408         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1409                 return -EINVAL;
1410
1411         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1412         if (!keys)
1413                 return -ENOMEM;
1414
1415         down_read(&current->mm->mmap_sem);
1416         srcu_idx = srcu_read_lock(&kvm->srcu);
1417         for (i = 0; i < args->count; i++) {
1418                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1419                 if (kvm_is_error_hva(hva)) {
1420                         r = -EFAULT;
1421                         break;
1422                 }
1423
1424                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1425                 if (r)
1426                         break;
1427         }
1428         srcu_read_unlock(&kvm->srcu, srcu_idx);
1429         up_read(&current->mm->mmap_sem);
1430
1431         if (!r) {
1432                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1433                                  sizeof(uint8_t) * args->count);
1434                 if (r)
1435                         r = -EFAULT;
1436         }
1437
1438         kvfree(keys);
1439         return r;
1440 }
1441
1442 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1443 {
1444         uint8_t *keys;
1445         uint64_t hva;
1446         int srcu_idx, i, r = 0;
1447
1448         if (args->flags != 0)
1449                 return -EINVAL;
1450
1451         /* Enforce sane limit on memory allocation */
1452         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1453                 return -EINVAL;
1454
1455         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1456         if (!keys)
1457                 return -ENOMEM;
1458
1459         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1460                            sizeof(uint8_t) * args->count);
1461         if (r) {
1462                 r = -EFAULT;
1463                 goto out;
1464         }
1465
1466         /* Enable storage key handling for the guest */
1467         r = s390_enable_skey();
1468         if (r)
1469                 goto out;
1470
1471         down_read(&current->mm->mmap_sem);
1472         srcu_idx = srcu_read_lock(&kvm->srcu);
1473         for (i = 0; i < args->count; i++) {
1474                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1475                 if (kvm_is_error_hva(hva)) {
1476                         r = -EFAULT;
1477                         break;
1478                 }
1479
1480                 /* Lowest order bit is reserved */
1481                 if (keys[i] & 0x01) {
1482                         r = -EINVAL;
1483                         break;
1484                 }
1485
1486                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1487                 if (r)
1488                         break;
1489         }
1490         srcu_read_unlock(&kvm->srcu, srcu_idx);
1491         up_read(&current->mm->mmap_sem);
1492 out:
1493         kvfree(keys);
1494         return r;
1495 }
1496
1497 /*
1498  * Base address and length must be sent at the start of each block, therefore
1499  * it's cheaper to send some clean data, as long as it's less than the size of
1500  * two longs.
1501  */
1502 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1503 /* for consistency */
1504 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1505
1506 /*
1507  * This function searches for the next page with dirty CMMA attributes, and
1508  * saves the attributes in the buffer up to either the end of the buffer or
1509  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1510  * no trailing clean bytes are saved.
1511  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1512  * output buffer will indicate 0 as length.
1513  */
1514 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1515                                   struct kvm_s390_cmma_log *args)
1516 {
1517         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1518         unsigned long bufsize, hva, pgstev, i, next, cur;
1519         int srcu_idx, peek, r = 0, rr;
1520         u8 *res;
1521
1522         cur = args->start_gfn;
1523         i = next = pgstev = 0;
1524
1525         if (unlikely(!kvm->arch.use_cmma))
1526                 return -ENXIO;
1527         /* Invalid/unsupported flags were specified */
1528         if (args->flags & ~KVM_S390_CMMA_PEEK)
1529                 return -EINVAL;
1530         /* Migration mode query, and we are not doing a migration */
1531         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1532         if (!peek && !s)
1533                 return -EINVAL;
1534         /* CMMA is disabled or was not used, or the buffer has length zero */
1535         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1536         if (!bufsize || !kvm->mm->context.use_cmma) {
1537                 memset(args, 0, sizeof(*args));
1538                 return 0;
1539         }
1540
1541         if (!peek) {
1542                 /* We are not peeking, and there are no dirty pages */
1543                 if (!atomic64_read(&s->dirty_pages)) {
1544                         memset(args, 0, sizeof(*args));
1545                         return 0;
1546                 }
1547                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1548                                     args->start_gfn);
1549                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1550                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1551                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1552                         memset(args, 0, sizeof(*args));
1553                         return 0;
1554                 }
1555                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1556         }
1557
1558         res = vmalloc(bufsize);
1559         if (!res)
1560                 return -ENOMEM;
1561
1562         args->start_gfn = cur;
1563
1564         down_read(&kvm->mm->mmap_sem);
1565         srcu_idx = srcu_read_lock(&kvm->srcu);
1566         while (i < bufsize) {
1567                 hva = gfn_to_hva(kvm, cur);
1568                 if (kvm_is_error_hva(hva)) {
1569                         r = -EFAULT;
1570                         break;
1571                 }
1572                 /* decrement only if we actually flipped the bit to 0 */
1573                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1574                         atomic64_dec(&s->dirty_pages);
1575                 r = get_pgste(kvm->mm, hva, &pgstev);
1576                 if (r < 0)
1577                         pgstev = 0;
1578                 /* save the value */
1579                 res[i++] = (pgstev >> 24) & 0x43;
1580                 /*
1581                  * if the next bit is too far away, stop.
1582                  * if we reached the previous "next", find the next one
1583                  */
1584                 if (!peek) {
1585                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1586                                 break;
1587                         if (cur == next)
1588                                 next = find_next_bit(s->pgste_bitmap,
1589                                                      s->bitmap_size, cur + 1);
1590                 /* reached the end of the bitmap or of the buffer, stop */
1591                         if ((next >= s->bitmap_size) ||
1592                             (next >= args->start_gfn + bufsize))
1593                                 break;
1594                 }
1595                 cur++;
1596         }
1597         srcu_read_unlock(&kvm->srcu, srcu_idx);
1598         up_read(&kvm->mm->mmap_sem);
1599         args->count = i;
1600         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1601
1602         rr = copy_to_user((void __user *)args->values, res, args->count);
1603         if (rr)
1604                 r = -EFAULT;
1605
1606         vfree(res);
1607         return r;
1608 }
1609
1610 /*
1611  * This function sets the CMMA attributes for the given pages. If the input
1612  * buffer has zero length, no action is taken, otherwise the attributes are
1613  * set and the mm->context.use_cmma flag is set.
1614  */
1615 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1616                                   const struct kvm_s390_cmma_log *args)
1617 {
1618         unsigned long hva, mask, pgstev, i;
1619         uint8_t *bits;
1620         int srcu_idx, r = 0;
1621
1622         mask = args->mask;
1623
1624         if (!kvm->arch.use_cmma)
1625                 return -ENXIO;
1626         /* invalid/unsupported flags */
1627         if (args->flags != 0)
1628                 return -EINVAL;
1629         /* Enforce sane limit on memory allocation */
1630         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1631                 return -EINVAL;
1632         /* Nothing to do */
1633         if (args->count == 0)
1634                 return 0;
1635
1636         bits = vmalloc(sizeof(*bits) * args->count);
1637         if (!bits)
1638                 return -ENOMEM;
1639
1640         r = copy_from_user(bits, (void __user *)args->values, args->count);
1641         if (r) {
1642                 r = -EFAULT;
1643                 goto out;
1644         }
1645
1646         down_read(&kvm->mm->mmap_sem);
1647         srcu_idx = srcu_read_lock(&kvm->srcu);
1648         for (i = 0; i < args->count; i++) {
1649                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1650                 if (kvm_is_error_hva(hva)) {
1651                         r = -EFAULT;
1652                         break;
1653                 }
1654
1655                 pgstev = bits[i];
1656                 pgstev = pgstev << 24;
1657                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1658                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1659         }
1660         srcu_read_unlock(&kvm->srcu, srcu_idx);
1661         up_read(&kvm->mm->mmap_sem);
1662
1663         if (!kvm->mm->context.use_cmma) {
1664                 down_write(&kvm->mm->mmap_sem);
1665                 kvm->mm->context.use_cmma = 1;
1666                 up_write(&kvm->mm->mmap_sem);
1667         }
1668 out:
1669         vfree(bits);
1670         return r;
1671 }
1672
1673 long kvm_arch_vm_ioctl(struct file *filp,
1674                        unsigned int ioctl, unsigned long arg)
1675 {
1676         struct kvm *kvm = filp->private_data;
1677         void __user *argp = (void __user *)arg;
1678         struct kvm_device_attr attr;
1679         int r;
1680
1681         switch (ioctl) {
1682         case KVM_S390_INTERRUPT: {
1683                 struct kvm_s390_interrupt s390int;
1684
1685                 r = -EFAULT;
1686                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1687                         break;
1688                 r = kvm_s390_inject_vm(kvm, &s390int);
1689                 break;
1690         }
1691         case KVM_ENABLE_CAP: {
1692                 struct kvm_enable_cap cap;
1693                 r = -EFAULT;
1694                 if (copy_from_user(&cap, argp, sizeof(cap)))
1695                         break;
1696                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1697                 break;
1698         }
1699         case KVM_CREATE_IRQCHIP: {
1700                 struct kvm_irq_routing_entry routing;
1701
1702                 r = -EINVAL;
1703                 if (kvm->arch.use_irqchip) {
1704                         /* Set up dummy routing. */
1705                         memset(&routing, 0, sizeof(routing));
1706                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1707                 }
1708                 break;
1709         }
1710         case KVM_SET_DEVICE_ATTR: {
1711                 r = -EFAULT;
1712                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1713                         break;
1714                 r = kvm_s390_vm_set_attr(kvm, &attr);
1715                 break;
1716         }
1717         case KVM_GET_DEVICE_ATTR: {
1718                 r = -EFAULT;
1719                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1720                         break;
1721                 r = kvm_s390_vm_get_attr(kvm, &attr);
1722                 break;
1723         }
1724         case KVM_HAS_DEVICE_ATTR: {
1725                 r = -EFAULT;
1726                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1727                         break;
1728                 r = kvm_s390_vm_has_attr(kvm, &attr);
1729                 break;
1730         }
1731         case KVM_S390_GET_SKEYS: {
1732                 struct kvm_s390_skeys args;
1733
1734                 r = -EFAULT;
1735                 if (copy_from_user(&args, argp,
1736                                    sizeof(struct kvm_s390_skeys)))
1737                         break;
1738                 r = kvm_s390_get_skeys(kvm, &args);
1739                 break;
1740         }
1741         case KVM_S390_SET_SKEYS: {
1742                 struct kvm_s390_skeys args;
1743
1744                 r = -EFAULT;
1745                 if (copy_from_user(&args, argp,
1746                                    sizeof(struct kvm_s390_skeys)))
1747                         break;
1748                 r = kvm_s390_set_skeys(kvm, &args);
1749                 break;
1750         }
1751         case KVM_S390_GET_CMMA_BITS: {
1752                 struct kvm_s390_cmma_log args;
1753
1754                 r = -EFAULT;
1755                 if (copy_from_user(&args, argp, sizeof(args)))
1756                         break;
1757                 r = kvm_s390_get_cmma_bits(kvm, &args);
1758                 if (!r) {
1759                         r = copy_to_user(argp, &args, sizeof(args));
1760                         if (r)
1761                                 r = -EFAULT;
1762                 }
1763                 break;
1764         }
1765         case KVM_S390_SET_CMMA_BITS: {
1766                 struct kvm_s390_cmma_log args;
1767
1768                 r = -EFAULT;
1769                 if (copy_from_user(&args, argp, sizeof(args)))
1770                         break;
1771                 r = kvm_s390_set_cmma_bits(kvm, &args);
1772                 break;
1773         }
1774         default:
1775                 r = -ENOTTY;
1776         }
1777
1778         return r;
1779 }
1780
1781 static int kvm_s390_query_ap_config(u8 *config)
1782 {
1783         u32 fcn_code = 0x04000000UL;
1784         u32 cc = 0;
1785
1786         memset(config, 0, 128);
1787         asm volatile(
1788                 "lgr 0,%1\n"
1789                 "lgr 2,%2\n"
1790                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1791                 "0: ipm %0\n"
1792                 "srl %0,28\n"
1793                 "1:\n"
1794                 EX_TABLE(0b, 1b)
1795                 : "+r" (cc)
1796                 : "r" (fcn_code), "r" (config)
1797                 : "cc", "0", "2", "memory"
1798         );
1799
1800         return cc;
1801 }
1802
1803 static int kvm_s390_apxa_installed(void)
1804 {
1805         u8 config[128];
1806         int cc;
1807
1808         if (test_facility(12)) {
1809                 cc = kvm_s390_query_ap_config(config);
1810
1811                 if (cc)
1812                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1813                 else
1814                         return config[0] & 0x40;
1815         }
1816
1817         return 0;
1818 }
1819
1820 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1821 {
1822         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1823
1824         if (kvm_s390_apxa_installed())
1825                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1826         else
1827                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1828 }
1829
1830 static u64 kvm_s390_get_initial_cpuid(void)
1831 {
1832         struct cpuid cpuid;
1833
1834         get_cpu_id(&cpuid);
1835         cpuid.version = 0xff;
1836         return *((u64 *) &cpuid);
1837 }
1838
1839 static void kvm_s390_crypto_init(struct kvm *kvm)
1840 {
1841         if (!test_kvm_facility(kvm, 76))
1842                 return;
1843
1844         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1845         kvm_s390_set_crycb_format(kvm);
1846
1847         /* Enable AES/DEA protected key functions by default */
1848         kvm->arch.crypto.aes_kw = 1;
1849         kvm->arch.crypto.dea_kw = 1;
1850         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1851                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1852         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1853                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1854 }
1855
1856 static void sca_dispose(struct kvm *kvm)
1857 {
1858         if (kvm->arch.use_esca)
1859                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1860         else
1861                 free_page((unsigned long)(kvm->arch.sca));
1862         kvm->arch.sca = NULL;
1863 }
1864
1865 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1866 {
1867         gfp_t alloc_flags = GFP_KERNEL;
1868         int i, rc;
1869         char debug_name[16];
1870         static unsigned long sca_offset;
1871
1872         rc = -EINVAL;
1873 #ifdef CONFIG_KVM_S390_UCONTROL
1874         if (type & ~KVM_VM_S390_UCONTROL)
1875                 goto out_err;
1876         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1877                 goto out_err;
1878 #else
1879         if (type)
1880                 goto out_err;
1881 #endif
1882
1883         rc = s390_enable_sie();
1884         if (rc)
1885                 goto out_err;
1886
1887         rc = -ENOMEM;
1888
1889         kvm->arch.use_esca = 0; /* start with basic SCA */
1890         if (!sclp.has_64bscao)
1891                 alloc_flags |= GFP_DMA;
1892         rwlock_init(&kvm->arch.sca_lock);
1893         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1894         if (!kvm->arch.sca)
1895                 goto out_err;
1896         spin_lock(&kvm_lock);
1897         sca_offset += 16;
1898         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1899                 sca_offset = 0;
1900         kvm->arch.sca = (struct bsca_block *)
1901                         ((char *) kvm->arch.sca + sca_offset);
1902         spin_unlock(&kvm_lock);
1903
1904         sprintf(debug_name, "kvm-%u", current->pid);
1905
1906         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1907         if (!kvm->arch.dbf)
1908                 goto out_err;
1909
1910         kvm->arch.sie_page2 =
1911              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1912         if (!kvm->arch.sie_page2)
1913                 goto out_err;
1914
1915         /* Populate the facility mask initially. */
1916         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1917                sizeof(S390_lowcore.stfle_fac_list));
1918         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1919                 if (i < kvm_s390_fac_list_mask_size())
1920                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1921                 else
1922                         kvm->arch.model.fac_mask[i] = 0UL;
1923         }
1924
1925         /* Populate the facility list initially. */
1926         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1927         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1928                S390_ARCH_FAC_LIST_SIZE_BYTE);
1929
1930         /* we are always in czam mode - even on pre z14 machines */
1931         set_kvm_facility(kvm->arch.model.fac_mask, 138);
1932         set_kvm_facility(kvm->arch.model.fac_list, 138);
1933         /* we emulate STHYI in kvm */
1934         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1935         set_kvm_facility(kvm->arch.model.fac_list, 74);
1936         if (MACHINE_HAS_TLB_GUEST) {
1937                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
1938                 set_kvm_facility(kvm->arch.model.fac_list, 147);
1939         }
1940
1941         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1942         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1943
1944         kvm_s390_crypto_init(kvm);
1945
1946         mutex_init(&kvm->arch.float_int.ais_lock);
1947         kvm->arch.float_int.simm = 0;
1948         kvm->arch.float_int.nimm = 0;
1949         spin_lock_init(&kvm->arch.float_int.lock);
1950         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1951                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1952         init_waitqueue_head(&kvm->arch.ipte_wq);
1953         mutex_init(&kvm->arch.ipte_mutex);
1954
1955         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1956         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1957
1958         if (type & KVM_VM_S390_UCONTROL) {
1959                 kvm->arch.gmap = NULL;
1960                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1961         } else {
1962                 if (sclp.hamax == U64_MAX)
1963                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1964                 else
1965                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1966                                                     sclp.hamax + 1);
1967                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1968                 if (!kvm->arch.gmap)
1969                         goto out_err;
1970                 kvm->arch.gmap->private = kvm;
1971                 kvm->arch.gmap->pfault_enabled = 0;
1972         }
1973
1974         kvm->arch.css_support = 0;
1975         kvm->arch.use_irqchip = 0;
1976         kvm->arch.epoch = 0;
1977
1978         spin_lock_init(&kvm->arch.start_stop_lock);
1979         kvm_s390_vsie_init(kvm);
1980         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1981
1982         return 0;
1983 out_err:
1984         free_page((unsigned long)kvm->arch.sie_page2);
1985         debug_unregister(kvm->arch.dbf);
1986         sca_dispose(kvm);
1987         KVM_EVENT(3, "creation of vm failed: %d", rc);
1988         return rc;
1989 }
1990
1991 bool kvm_arch_has_vcpu_debugfs(void)
1992 {
1993         return false;
1994 }
1995
1996 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1997 {
1998         return 0;
1999 }
2000
2001 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2002 {
2003         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2004         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2005         kvm_s390_clear_local_irqs(vcpu);
2006         kvm_clear_async_pf_completion_queue(vcpu);
2007         if (!kvm_is_ucontrol(vcpu->kvm))
2008                 sca_del_vcpu(vcpu);
2009
2010         if (kvm_is_ucontrol(vcpu->kvm))
2011                 gmap_remove(vcpu->arch.gmap);
2012
2013         if (vcpu->kvm->arch.use_cmma)
2014                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2015         free_page((unsigned long)(vcpu->arch.sie_block));
2016
2017         kvm_vcpu_uninit(vcpu);
2018         kmem_cache_free(kvm_vcpu_cache, vcpu);
2019 }
2020
2021 static void kvm_free_vcpus(struct kvm *kvm)
2022 {
2023         unsigned int i;
2024         struct kvm_vcpu *vcpu;
2025
2026         kvm_for_each_vcpu(i, vcpu, kvm)
2027                 kvm_arch_vcpu_destroy(vcpu);
2028
2029         mutex_lock(&kvm->lock);
2030         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2031                 kvm->vcpus[i] = NULL;
2032
2033         atomic_set(&kvm->online_vcpus, 0);
2034         mutex_unlock(&kvm->lock);
2035 }
2036
2037 void kvm_arch_destroy_vm(struct kvm *kvm)
2038 {
2039         kvm_free_vcpus(kvm);
2040         sca_dispose(kvm);
2041         debug_unregister(kvm->arch.dbf);
2042         free_page((unsigned long)kvm->arch.sie_page2);
2043         if (!kvm_is_ucontrol(kvm))
2044                 gmap_remove(kvm->arch.gmap);
2045         kvm_s390_destroy_adapters(kvm);
2046         kvm_s390_clear_float_irqs(kvm);
2047         kvm_s390_vsie_destroy(kvm);
2048         if (kvm->arch.migration_state) {
2049                 vfree(kvm->arch.migration_state->pgste_bitmap);
2050                 kfree(kvm->arch.migration_state);
2051         }
2052         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2053 }
2054
2055 /* Section: vcpu related */
2056 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2057 {
2058         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2059         if (!vcpu->arch.gmap)
2060                 return -ENOMEM;
2061         vcpu->arch.gmap->private = vcpu->kvm;
2062
2063         return 0;
2064 }
2065
2066 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2067 {
2068         if (!kvm_s390_use_sca_entries())
2069                 return;
2070         read_lock(&vcpu->kvm->arch.sca_lock);
2071         if (vcpu->kvm->arch.use_esca) {
2072                 struct esca_block *sca = vcpu->kvm->arch.sca;
2073
2074                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2075                 sca->cpu[vcpu->vcpu_id].sda = 0;
2076         } else {
2077                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2078
2079                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2080                 sca->cpu[vcpu->vcpu_id].sda = 0;
2081         }
2082         read_unlock(&vcpu->kvm->arch.sca_lock);
2083 }
2084
2085 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2086 {
2087         if (!kvm_s390_use_sca_entries()) {
2088                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2089
2090                 /* we still need the basic sca for the ipte control */
2091                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2092                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2093         }
2094         read_lock(&vcpu->kvm->arch.sca_lock);
2095         if (vcpu->kvm->arch.use_esca) {
2096                 struct esca_block *sca = vcpu->kvm->arch.sca;
2097
2098                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2099                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2100                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2101                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2102                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2103         } else {
2104                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2105
2106                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2107                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2108                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2109                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2110         }
2111         read_unlock(&vcpu->kvm->arch.sca_lock);
2112 }
2113
2114 /* Basic SCA to Extended SCA data copy routines */
2115 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2116 {
2117         d->sda = s->sda;
2118         d->sigp_ctrl.c = s->sigp_ctrl.c;
2119         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2120 }
2121
2122 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2123 {
2124         int i;
2125
2126         d->ipte_control = s->ipte_control;
2127         d->mcn[0] = s->mcn;
2128         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2129                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2130 }
2131
2132 static int sca_switch_to_extended(struct kvm *kvm)
2133 {
2134         struct bsca_block *old_sca = kvm->arch.sca;
2135         struct esca_block *new_sca;
2136         struct kvm_vcpu *vcpu;
2137         unsigned int vcpu_idx;
2138         u32 scaol, scaoh;
2139
2140         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2141         if (!new_sca)
2142                 return -ENOMEM;
2143
2144         scaoh = (u32)((u64)(new_sca) >> 32);
2145         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2146
2147         kvm_s390_vcpu_block_all(kvm);
2148         write_lock(&kvm->arch.sca_lock);
2149
2150         sca_copy_b_to_e(new_sca, old_sca);
2151
2152         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2153                 vcpu->arch.sie_block->scaoh = scaoh;
2154                 vcpu->arch.sie_block->scaol = scaol;
2155                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2156         }
2157         kvm->arch.sca = new_sca;
2158         kvm->arch.use_esca = 1;
2159
2160         write_unlock(&kvm->arch.sca_lock);
2161         kvm_s390_vcpu_unblock_all(kvm);
2162
2163         free_page((unsigned long)old_sca);
2164
2165         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2166                  old_sca, kvm->arch.sca);
2167         return 0;
2168 }
2169
2170 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2171 {
2172         int rc;
2173
2174         if (!kvm_s390_use_sca_entries()) {
2175                 if (id < KVM_MAX_VCPUS)
2176                         return true;
2177                 return false;
2178         }
2179         if (id < KVM_S390_BSCA_CPU_SLOTS)
2180                 return true;
2181         if (!sclp.has_esca || !sclp.has_64bscao)
2182                 return false;
2183
2184         mutex_lock(&kvm->lock);
2185         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2186         mutex_unlock(&kvm->lock);
2187
2188         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2189 }
2190
2191 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2192 {
2193         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2194         kvm_clear_async_pf_completion_queue(vcpu);
2195         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2196                                     KVM_SYNC_GPRS |
2197                                     KVM_SYNC_ACRS |
2198                                     KVM_SYNC_CRS |
2199                                     KVM_SYNC_ARCH0 |
2200                                     KVM_SYNC_PFAULT;
2201         kvm_s390_set_prefix(vcpu, 0);
2202         if (test_kvm_facility(vcpu->kvm, 64))
2203                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2204         if (test_kvm_facility(vcpu->kvm, 82))
2205                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2206         if (test_kvm_facility(vcpu->kvm, 133))
2207                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2208         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2209          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2210          */
2211         if (MACHINE_HAS_VX)
2212                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2213         else
2214                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2215
2216         if (kvm_is_ucontrol(vcpu->kvm))
2217                 return __kvm_ucontrol_vcpu_init(vcpu);
2218
2219         return 0;
2220 }
2221
2222 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2223 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2224 {
2225         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2226         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2227         vcpu->arch.cputm_start = get_tod_clock_fast();
2228         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2229 }
2230
2231 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2232 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2233 {
2234         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2235         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2236         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2237         vcpu->arch.cputm_start = 0;
2238         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2239 }
2240
2241 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2242 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2243 {
2244         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2245         vcpu->arch.cputm_enabled = true;
2246         __start_cpu_timer_accounting(vcpu);
2247 }
2248
2249 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2250 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2251 {
2252         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2253         __stop_cpu_timer_accounting(vcpu);
2254         vcpu->arch.cputm_enabled = false;
2255 }
2256
2257 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2258 {
2259         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2260         __enable_cpu_timer_accounting(vcpu);
2261         preempt_enable();
2262 }
2263
2264 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2265 {
2266         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2267         __disable_cpu_timer_accounting(vcpu);
2268         preempt_enable();
2269 }
2270
2271 /* set the cpu timer - may only be called from the VCPU thread itself */
2272 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2273 {
2274         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2275         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2276         if (vcpu->arch.cputm_enabled)
2277                 vcpu->arch.cputm_start = get_tod_clock_fast();
2278         vcpu->arch.sie_block->cputm = cputm;
2279         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2280         preempt_enable();
2281 }
2282
2283 /* update and get the cpu timer - can also be called from other VCPU threads */
2284 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2285 {
2286         unsigned int seq;
2287         __u64 value;
2288
2289         if (unlikely(!vcpu->arch.cputm_enabled))
2290                 return vcpu->arch.sie_block->cputm;
2291
2292         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2293         do {
2294                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2295                 /*
2296                  * If the writer would ever execute a read in the critical
2297                  * section, e.g. in irq context, we have a deadlock.
2298                  */
2299                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2300                 value = vcpu->arch.sie_block->cputm;
2301                 /* if cputm_start is 0, accounting is being started/stopped */
2302                 if (likely(vcpu->arch.cputm_start))
2303                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2304         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2305         preempt_enable();
2306         return value;
2307 }
2308
2309 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2310 {
2311
2312         gmap_enable(vcpu->arch.enabled_gmap);
2313         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2314         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2315                 __start_cpu_timer_accounting(vcpu);
2316         vcpu->cpu = cpu;
2317 }
2318
2319 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2320 {
2321         vcpu->cpu = -1;
2322         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2323                 __stop_cpu_timer_accounting(vcpu);
2324         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2325         vcpu->arch.enabled_gmap = gmap_get_enabled();
2326         gmap_disable(vcpu->arch.enabled_gmap);
2327
2328 }
2329
2330 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2331 {
2332         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2333         vcpu->arch.sie_block->gpsw.mask = 0UL;
2334         vcpu->arch.sie_block->gpsw.addr = 0UL;
2335         kvm_s390_set_prefix(vcpu, 0);
2336         kvm_s390_set_cpu_timer(vcpu, 0);
2337         vcpu->arch.sie_block->ckc       = 0UL;
2338         vcpu->arch.sie_block->todpr     = 0;
2339         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2340         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2341         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2342         /* make sure the new fpc will be lazily loaded */
2343         save_fpu_regs();
2344         current->thread.fpu.fpc = 0;
2345         vcpu->arch.sie_block->gbea = 1;
2346         vcpu->arch.sie_block->pp = 0;
2347         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2348         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2349         kvm_clear_async_pf_completion_queue(vcpu);
2350         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2351                 kvm_s390_vcpu_stop(vcpu);
2352         kvm_s390_clear_local_irqs(vcpu);
2353 }
2354
2355 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2356 {
2357         mutex_lock(&vcpu->kvm->lock);
2358         preempt_disable();
2359         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2360         preempt_enable();
2361         mutex_unlock(&vcpu->kvm->lock);
2362         if (!kvm_is_ucontrol(vcpu->kvm)) {
2363                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2364                 sca_add_vcpu(vcpu);
2365         }
2366         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2367                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2368         /* make vcpu_load load the right gmap on the first trigger */
2369         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2370 }
2371
2372 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2373 {
2374         if (!test_kvm_facility(vcpu->kvm, 76))
2375                 return;
2376
2377         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2378
2379         if (vcpu->kvm->arch.crypto.aes_kw)
2380                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2381         if (vcpu->kvm->arch.crypto.dea_kw)
2382                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2383
2384         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2385 }
2386
2387 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2388 {
2389         free_page(vcpu->arch.sie_block->cbrlo);
2390         vcpu->arch.sie_block->cbrlo = 0;
2391 }
2392
2393 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2394 {
2395         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2396         if (!vcpu->arch.sie_block->cbrlo)
2397                 return -ENOMEM;
2398
2399         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2400         return 0;
2401 }
2402
2403 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2404 {
2405         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2406
2407         vcpu->arch.sie_block->ibc = model->ibc;
2408         if (test_kvm_facility(vcpu->kvm, 7))
2409                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2410 }
2411
2412 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2413 {
2414         int rc = 0;
2415
2416         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2417                                                     CPUSTAT_SM |
2418                                                     CPUSTAT_STOPPED);
2419
2420         if (test_kvm_facility(vcpu->kvm, 78))
2421                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2422         else if (test_kvm_facility(vcpu->kvm, 8))
2423                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2424
2425         kvm_s390_vcpu_setup_model(vcpu);
2426
2427         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2428         if (MACHINE_HAS_ESOP)
2429                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2430         if (test_kvm_facility(vcpu->kvm, 9))
2431                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2432         if (test_kvm_facility(vcpu->kvm, 73))
2433                 vcpu->arch.sie_block->ecb |= ECB_TE;
2434
2435         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2436                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2437         if (test_kvm_facility(vcpu->kvm, 130))
2438                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2439         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2440         if (sclp.has_cei)
2441                 vcpu->arch.sie_block->eca |= ECA_CEI;
2442         if (sclp.has_ib)
2443                 vcpu->arch.sie_block->eca |= ECA_IB;
2444         if (sclp.has_siif)
2445                 vcpu->arch.sie_block->eca |= ECA_SII;
2446         if (sclp.has_sigpif)
2447                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2448         if (test_kvm_facility(vcpu->kvm, 129)) {
2449                 vcpu->arch.sie_block->eca |= ECA_VX;
2450                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2451         }
2452         if (test_kvm_facility(vcpu->kvm, 139))
2453                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2454
2455         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2456                                         | SDNXC;
2457         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2458
2459         if (sclp.has_kss)
2460                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2461         else
2462                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2463
2464         if (vcpu->kvm->arch.use_cmma) {
2465                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2466                 if (rc)
2467                         return rc;
2468         }
2469         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2470         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2471
2472         kvm_s390_vcpu_crypto_setup(vcpu);
2473
2474         return rc;
2475 }
2476
2477 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2478                                       unsigned int id)
2479 {
2480         struct kvm_vcpu *vcpu;
2481         struct sie_page *sie_page;
2482         int rc = -EINVAL;
2483
2484         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2485                 goto out;
2486
2487         rc = -ENOMEM;
2488
2489         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2490         if (!vcpu)
2491                 goto out;
2492
2493         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2494         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2495         if (!sie_page)
2496                 goto out_free_cpu;
2497
2498         vcpu->arch.sie_block = &sie_page->sie_block;
2499         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2500
2501         /* the real guest size will always be smaller than msl */
2502         vcpu->arch.sie_block->mso = 0;
2503         vcpu->arch.sie_block->msl = sclp.hamax;
2504
2505         vcpu->arch.sie_block->icpua = id;
2506         spin_lock_init(&vcpu->arch.local_int.lock);
2507         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2508         vcpu->arch.local_int.wq = &vcpu->wq;
2509         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2510         seqcount_init(&vcpu->arch.cputm_seqcount);
2511
2512         rc = kvm_vcpu_init(vcpu, kvm, id);
2513         if (rc)
2514                 goto out_free_sie_block;
2515         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2516                  vcpu->arch.sie_block);
2517         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2518
2519         return vcpu;
2520 out_free_sie_block:
2521         free_page((unsigned long)(vcpu->arch.sie_block));
2522 out_free_cpu:
2523         kmem_cache_free(kvm_vcpu_cache, vcpu);
2524 out:
2525         return ERR_PTR(rc);
2526 }
2527
2528 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2529 {
2530         return kvm_s390_vcpu_has_irq(vcpu, 0);
2531 }
2532
2533 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2534 {
2535         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2536 }
2537
2538 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2539 {
2540         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2541         exit_sie(vcpu);
2542 }
2543
2544 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2545 {
2546         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2547 }
2548
2549 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2550 {
2551         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2552         exit_sie(vcpu);
2553 }
2554
2555 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2556 {
2557         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2558 }
2559
2560 /*
2561  * Kick a guest cpu out of SIE and wait until SIE is not running.
2562  * If the CPU is not running (e.g. waiting as idle) the function will
2563  * return immediately. */
2564 void exit_sie(struct kvm_vcpu *vcpu)
2565 {
2566         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2567         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2568                 cpu_relax();
2569 }
2570
2571 /* Kick a guest cpu out of SIE to process a request synchronously */
2572 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2573 {
2574         kvm_make_request(req, vcpu);
2575         kvm_s390_vcpu_request(vcpu);
2576 }
2577
2578 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2579                               unsigned long end)
2580 {
2581         struct kvm *kvm = gmap->private;
2582         struct kvm_vcpu *vcpu;
2583         unsigned long prefix;
2584         int i;
2585
2586         if (gmap_is_shadow(gmap))
2587                 return;
2588         if (start >= 1UL << 31)
2589                 /* We are only interested in prefix pages */
2590                 return;
2591         kvm_for_each_vcpu(i, vcpu, kvm) {
2592                 /* match against both prefix pages */
2593                 prefix = kvm_s390_get_prefix(vcpu);
2594                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2595                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2596                                    start, end);
2597                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2598                 }
2599         }
2600 }
2601
2602 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2603 {
2604         /* kvm common code refers to this, but never calls it */
2605         BUG();
2606         return 0;
2607 }
2608
2609 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2610                                            struct kvm_one_reg *reg)
2611 {
2612         int r = -EINVAL;
2613
2614         switch (reg->id) {
2615         case KVM_REG_S390_TODPR:
2616                 r = put_user(vcpu->arch.sie_block->todpr,
2617                              (u32 __user *)reg->addr);
2618                 break;
2619         case KVM_REG_S390_EPOCHDIFF:
2620                 r = put_user(vcpu->arch.sie_block->epoch,
2621                              (u64 __user *)reg->addr);
2622                 break;
2623         case KVM_REG_S390_CPU_TIMER:
2624                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2625                              (u64 __user *)reg->addr);
2626                 break;
2627         case KVM_REG_S390_CLOCK_COMP:
2628                 r = put_user(vcpu->arch.sie_block->ckc,
2629                              (u64 __user *)reg->addr);
2630                 break;
2631         case KVM_REG_S390_PFTOKEN:
2632                 r = put_user(vcpu->arch.pfault_token,
2633                              (u64 __user *)reg->addr);
2634                 break;
2635         case KVM_REG_S390_PFCOMPARE:
2636                 r = put_user(vcpu->arch.pfault_compare,
2637                              (u64 __user *)reg->addr);
2638                 break;
2639         case KVM_REG_S390_PFSELECT:
2640                 r = put_user(vcpu->arch.pfault_select,
2641                              (u64 __user *)reg->addr);
2642                 break;
2643         case KVM_REG_S390_PP:
2644                 r = put_user(vcpu->arch.sie_block->pp,
2645                              (u64 __user *)reg->addr);
2646                 break;
2647         case KVM_REG_S390_GBEA:
2648                 r = put_user(vcpu->arch.sie_block->gbea,
2649                              (u64 __user *)reg->addr);
2650                 break;
2651         default:
2652                 break;
2653         }
2654
2655         return r;
2656 }
2657
2658 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2659                                            struct kvm_one_reg *reg)
2660 {
2661         int r = -EINVAL;
2662         __u64 val;
2663
2664         switch (reg->id) {
2665         case KVM_REG_S390_TODPR:
2666                 r = get_user(vcpu->arch.sie_block->todpr,
2667                              (u32 __user *)reg->addr);
2668                 break;
2669         case KVM_REG_S390_EPOCHDIFF:
2670                 r = get_user(vcpu->arch.sie_block->epoch,
2671                              (u64 __user *)reg->addr);
2672                 break;
2673         case KVM_REG_S390_CPU_TIMER:
2674                 r = get_user(val, (u64 __user *)reg->addr);
2675                 if (!r)
2676                         kvm_s390_set_cpu_timer(vcpu, val);
2677                 break;
2678         case KVM_REG_S390_CLOCK_COMP:
2679                 r = get_user(vcpu->arch.sie_block->ckc,
2680                              (u64 __user *)reg->addr);
2681                 break;
2682         case KVM_REG_S390_PFTOKEN:
2683                 r = get_user(vcpu->arch.pfault_token,
2684                              (u64 __user *)reg->addr);
2685                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2686                         kvm_clear_async_pf_completion_queue(vcpu);
2687                 break;
2688         case KVM_REG_S390_PFCOMPARE:
2689                 r = get_user(vcpu->arch.pfault_compare,
2690                              (u64 __user *)reg->addr);
2691                 break;
2692         case KVM_REG_S390_PFSELECT:
2693                 r = get_user(vcpu->arch.pfault_select,
2694                              (u64 __user *)reg->addr);
2695                 break;
2696         case KVM_REG_S390_PP:
2697                 r = get_user(vcpu->arch.sie_block->pp,
2698                              (u64 __user *)reg->addr);
2699                 break;
2700         case KVM_REG_S390_GBEA:
2701                 r = get_user(vcpu->arch.sie_block->gbea,
2702                              (u64 __user *)reg->addr);
2703                 break;
2704         default:
2705                 break;
2706         }
2707
2708         return r;
2709 }
2710
2711 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2712 {
2713         kvm_s390_vcpu_initial_reset(vcpu);
2714         return 0;
2715 }
2716
2717 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2718 {
2719         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2720         return 0;
2721 }
2722
2723 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2724 {
2725         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2726         return 0;
2727 }
2728
2729 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2730                                   struct kvm_sregs *sregs)
2731 {
2732         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2733         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2734         return 0;
2735 }
2736
2737 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2738                                   struct kvm_sregs *sregs)
2739 {
2740         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2741         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2742         return 0;
2743 }
2744
2745 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2746 {
2747         if (test_fp_ctl(fpu->fpc))
2748                 return -EINVAL;
2749         vcpu->run->s.regs.fpc = fpu->fpc;
2750         if (MACHINE_HAS_VX)
2751                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2752                                  (freg_t *) fpu->fprs);
2753         else
2754                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2755         return 0;
2756 }
2757
2758 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2759 {
2760         /* make sure we have the latest values */
2761         save_fpu_regs();
2762         if (MACHINE_HAS_VX)
2763                 convert_vx_to_fp((freg_t *) fpu->fprs,
2764                                  (__vector128 *) vcpu->run->s.regs.vrs);
2765         else
2766                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2767         fpu->fpc = vcpu->run->s.regs.fpc;
2768         return 0;
2769 }
2770
2771 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2772 {
2773         int rc = 0;
2774
2775         if (!is_vcpu_stopped(vcpu))
2776                 rc = -EBUSY;
2777         else {
2778                 vcpu->run->psw_mask = psw.mask;
2779                 vcpu->run->psw_addr = psw.addr;
2780         }
2781         return rc;
2782 }
2783
2784 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2785                                   struct kvm_translation *tr)
2786 {
2787         return -EINVAL; /* not implemented yet */
2788 }
2789
2790 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2791                               KVM_GUESTDBG_USE_HW_BP | \
2792                               KVM_GUESTDBG_ENABLE)
2793
2794 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2795                                         struct kvm_guest_debug *dbg)
2796 {
2797         int rc = 0;
2798
2799         vcpu->guest_debug = 0;
2800         kvm_s390_clear_bp_data(vcpu);
2801
2802         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2803                 return -EINVAL;
2804         if (!sclp.has_gpere)
2805                 return -EINVAL;
2806
2807         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2808                 vcpu->guest_debug = dbg->control;
2809                 /* enforce guest PER */
2810                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2811
2812                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2813                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2814         } else {
2815                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2816                 vcpu->arch.guestdbg.last_bp = 0;
2817         }
2818
2819         if (rc) {
2820                 vcpu->guest_debug = 0;
2821                 kvm_s390_clear_bp_data(vcpu);
2822                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2823         }
2824
2825         return rc;
2826 }
2827
2828 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2829                                     struct kvm_mp_state *mp_state)
2830 {
2831         /* CHECK_STOP and LOAD are not supported yet */
2832         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2833                                        KVM_MP_STATE_OPERATING;
2834 }
2835
2836 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2837                                     struct kvm_mp_state *mp_state)
2838 {
2839         int rc = 0;
2840
2841         /* user space knows about this interface - let it control the state */
2842         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2843
2844         switch (mp_state->mp_state) {
2845         case KVM_MP_STATE_STOPPED:
2846                 kvm_s390_vcpu_stop(vcpu);
2847                 break;
2848         case KVM_MP_STATE_OPERATING:
2849                 kvm_s390_vcpu_start(vcpu);
2850                 break;
2851         case KVM_MP_STATE_LOAD:
2852         case KVM_MP_STATE_CHECK_STOP:
2853                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2854         default:
2855                 rc = -ENXIO;
2856         }
2857
2858         return rc;
2859 }
2860
2861 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2862 {
2863         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2864 }
2865
2866 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2867 {
2868 retry:
2869         kvm_s390_vcpu_request_handled(vcpu);
2870         if (!kvm_request_pending(vcpu))
2871                 return 0;
2872         /*
2873          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2874          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2875          * This ensures that the ipte instruction for this request has
2876          * already finished. We might race against a second unmapper that
2877          * wants to set the blocking bit. Lets just retry the request loop.
2878          */
2879         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2880                 int rc;
2881                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2882                                           kvm_s390_get_prefix(vcpu),
2883                                           PAGE_SIZE * 2, PROT_WRITE);
2884                 if (rc) {
2885                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2886                         return rc;
2887                 }
2888                 goto retry;
2889         }
2890
2891         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2892                 vcpu->arch.sie_block->ihcpu = 0xffff;
2893                 goto retry;
2894         }
2895
2896         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2897                 if (!ibs_enabled(vcpu)) {
2898                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2899                         atomic_or(CPUSTAT_IBS,
2900                                         &vcpu->arch.sie_block->cpuflags);
2901                 }
2902                 goto retry;
2903         }
2904
2905         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2906                 if (ibs_enabled(vcpu)) {
2907                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2908                         atomic_andnot(CPUSTAT_IBS,
2909                                           &vcpu->arch.sie_block->cpuflags);
2910                 }
2911                 goto retry;
2912         }
2913
2914         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2915                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2916                 goto retry;
2917         }
2918
2919         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2920                 /*
2921                  * Disable CMMA virtualization; we will emulate the ESSA
2922                  * instruction manually, in order to provide additional
2923                  * functionalities needed for live migration.
2924                  */
2925                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2926                 goto retry;
2927         }
2928
2929         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2930                 /*
2931                  * Re-enable CMMA virtualization if CMMA is available and
2932                  * was used.
2933                  */
2934                 if ((vcpu->kvm->arch.use_cmma) &&
2935                     (vcpu->kvm->mm->context.use_cmma))
2936                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2937                 goto retry;
2938         }
2939
2940         /* nothing to do, just clear the request */
2941         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2942
2943         return 0;
2944 }
2945
2946 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2947                                  const struct kvm_s390_vm_tod_clock *gtod)
2948 {
2949         struct kvm_vcpu *vcpu;
2950         struct kvm_s390_tod_clock_ext htod;
2951         int i;
2952
2953         mutex_lock(&kvm->lock);
2954         preempt_disable();
2955
2956         get_tod_clock_ext((char *)&htod);
2957
2958         kvm->arch.epoch = gtod->tod - htod.tod;
2959         kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2960
2961         if (kvm->arch.epoch > gtod->tod)
2962                 kvm->arch.epdx -= 1;
2963
2964         kvm_s390_vcpu_block_all(kvm);
2965         kvm_for_each_vcpu(i, vcpu, kvm) {
2966                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2967                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2968         }
2969
2970         kvm_s390_vcpu_unblock_all(kvm);
2971         preempt_enable();
2972         mutex_unlock(&kvm->lock);
2973 }
2974
2975 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2976 {
2977         struct kvm_vcpu *vcpu;
2978         int i;
2979
2980         mutex_lock(&kvm->lock);
2981         preempt_disable();
2982         kvm->arch.epoch = tod - get_tod_clock();
2983         kvm_s390_vcpu_block_all(kvm);
2984         kvm_for_each_vcpu(i, vcpu, kvm)
2985                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2986         kvm_s390_vcpu_unblock_all(kvm);
2987         preempt_enable();
2988         mutex_unlock(&kvm->lock);
2989 }
2990
2991 /**
2992  * kvm_arch_fault_in_page - fault-in guest page if necessary
2993  * @vcpu: The corresponding virtual cpu
2994  * @gpa: Guest physical address
2995  * @writable: Whether the page should be writable or not
2996  *
2997  * Make sure that a guest page has been faulted-in on the host.
2998  *
2999  * Return: Zero on success, negative error code otherwise.
3000  */
3001 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3002 {
3003         return gmap_fault(vcpu->arch.gmap, gpa,
3004                           writable ? FAULT_FLAG_WRITE : 0);
3005 }
3006
3007 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3008                                       unsigned long token)
3009 {
3010         struct kvm_s390_interrupt inti;
3011         struct kvm_s390_irq irq;
3012
3013         if (start_token) {
3014                 irq.u.ext.ext_params2 = token;
3015                 irq.type = KVM_S390_INT_PFAULT_INIT;
3016                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3017         } else {
3018                 inti.type = KVM_S390_INT_PFAULT_DONE;
3019                 inti.parm64 = token;
3020                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3021         }
3022 }
3023
3024 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3025                                      struct kvm_async_pf *work)
3026 {
3027         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3028         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3029 }
3030
3031 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3032                                  struct kvm_async_pf *work)
3033 {
3034         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3035         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3036 }
3037
3038 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3039                                struct kvm_async_pf *work)
3040 {
3041         /* s390 will always inject the page directly */
3042 }
3043
3044 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3045 {
3046         /*
3047          * s390 will always inject the page directly,
3048          * but we still want check_async_completion to cleanup
3049          */
3050         return true;
3051 }
3052
3053 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3054 {
3055         hva_t hva;
3056         struct kvm_arch_async_pf arch;
3057         int rc;
3058
3059         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3060                 return 0;
3061         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3062             vcpu->arch.pfault_compare)
3063                 return 0;
3064         if (psw_extint_disabled(vcpu))
3065                 return 0;
3066         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3067                 return 0;
3068         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3069                 return 0;
3070         if (!vcpu->arch.gmap->pfault_enabled)
3071                 return 0;
3072
3073         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3074         hva += current->thread.gmap_addr & ~PAGE_MASK;
3075         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3076                 return 0;
3077
3078         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3079         return rc;
3080 }
3081
3082 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3083 {
3084         int rc, cpuflags;
3085
3086         /*
3087          * On s390 notifications for arriving pages will be delivered directly
3088          * to the guest but the house keeping for completed pfaults is
3089          * handled outside the worker.
3090          */
3091         kvm_check_async_pf_completion(vcpu);
3092
3093         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3094         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3095
3096         if (need_resched())
3097                 schedule();
3098
3099         if (test_cpu_flag(CIF_MCCK_PENDING))
3100                 s390_handle_mcck();
3101
3102         if (!kvm_is_ucontrol(vcpu->kvm)) {
3103                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3104                 if (rc)
3105                         return rc;
3106         }
3107
3108         rc = kvm_s390_handle_requests(vcpu);
3109         if (rc)
3110                 return rc;
3111
3112         if (guestdbg_enabled(vcpu)) {
3113                 kvm_s390_backup_guest_per_regs(vcpu);
3114                 kvm_s390_patch_guest_per_regs(vcpu);
3115         }
3116
3117         vcpu->arch.sie_block->icptcode = 0;
3118         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3119         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3120         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3121
3122         return 0;
3123 }
3124
3125 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3126 {
3127         struct kvm_s390_pgm_info pgm_info = {
3128                 .code = PGM_ADDRESSING,
3129         };
3130         u8 opcode, ilen;
3131         int rc;
3132
3133         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3134         trace_kvm_s390_sie_fault(vcpu);
3135
3136         /*
3137          * We want to inject an addressing exception, which is defined as a
3138          * suppressing or terminating exception. However, since we came here
3139          * by a DAT access exception, the PSW still points to the faulting
3140          * instruction since DAT exceptions are nullifying. So we've got
3141          * to look up the current opcode to get the length of the instruction
3142          * to be able to forward the PSW.
3143          */
3144         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3145         ilen = insn_length(opcode);
3146         if (rc < 0) {
3147                 return rc;
3148         } else if (rc) {
3149                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3150                  * Forward by arbitrary ilc, injection will take care of
3151                  * nullification if necessary.
3152                  */
3153                 pgm_info = vcpu->arch.pgm;
3154                 ilen = 4;
3155         }
3156         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3157         kvm_s390_forward_psw(vcpu, ilen);
3158         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3159 }
3160
3161 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3162 {
3163         struct mcck_volatile_info *mcck_info;
3164         struct sie_page *sie_page;
3165
3166         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3167                    vcpu->arch.sie_block->icptcode);
3168         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3169
3170         if (guestdbg_enabled(vcpu))
3171                 kvm_s390_restore_guest_per_regs(vcpu);
3172
3173         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3174         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3175
3176         if (exit_reason == -EINTR) {
3177                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3178                 sie_page = container_of(vcpu->arch.sie_block,
3179                                         struct sie_page, sie_block);
3180                 mcck_info = &sie_page->mcck_info;
3181                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3182                 return 0;
3183         }
3184
3185         if (vcpu->arch.sie_block->icptcode > 0) {
3186                 int rc = kvm_handle_sie_intercept(vcpu);
3187
3188                 if (rc != -EOPNOTSUPP)
3189                         return rc;
3190                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3191                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3192                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3193                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3194                 return -EREMOTE;
3195         } else if (exit_reason != -EFAULT) {
3196                 vcpu->stat.exit_null++;
3197                 return 0;
3198         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3199                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3200                 vcpu->run->s390_ucontrol.trans_exc_code =
3201                                                 current->thread.gmap_addr;
3202                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3203                 return -EREMOTE;
3204         } else if (current->thread.gmap_pfault) {
3205                 trace_kvm_s390_major_guest_pfault(vcpu);
3206                 current->thread.gmap_pfault = 0;
3207                 if (kvm_arch_setup_async_pf(vcpu))
3208                         return 0;
3209                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3210         }
3211         return vcpu_post_run_fault_in_sie(vcpu);
3212 }
3213
3214 static int __vcpu_run(struct kvm_vcpu *vcpu)
3215 {
3216         int rc, exit_reason;
3217
3218         /*
3219          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3220          * ning the guest), so that memslots (and other stuff) are protected
3221          */
3222         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3223
3224         do {
3225                 rc = vcpu_pre_run(vcpu);
3226                 if (rc)
3227                         break;
3228
3229                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3230                 /*
3231                  * As PF_VCPU will be used in fault handler, between
3232                  * guest_enter and guest_exit should be no uaccess.
3233                  */
3234                 local_irq_disable();
3235                 guest_enter_irqoff();
3236                 __disable_cpu_timer_accounting(vcpu);
3237                 local_irq_enable();
3238                 exit_reason = sie64a(vcpu->arch.sie_block,
3239                                      vcpu->run->s.regs.gprs);
3240                 local_irq_disable();
3241                 __enable_cpu_timer_accounting(vcpu);
3242                 guest_exit_irqoff();
3243                 local_irq_enable();
3244                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3245
3246                 rc = vcpu_post_run(vcpu, exit_reason);