kvm: nVMX: Validate the virtual-APIC address on nested VM-entry
[sfrench/cifs-2.6.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60                            (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65         { "userspace_handled", VCPU_STAT(exit_userspace) },
66         { "exit_null", VCPU_STAT(exit_null) },
67         { "exit_validity", VCPU_STAT(exit_validity) },
68         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69         { "exit_external_request", VCPU_STAT(exit_external_request) },
70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71         { "exit_instruction", VCPU_STAT(exit_instruction) },
72         { "exit_pei", VCPU_STAT(exit_pei) },
73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95         { "instruction_spx", VCPU_STAT(instruction_spx) },
96         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97         { "instruction_stap", VCPU_STAT(instruction_stap) },
98         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102         { "instruction_essa", VCPU_STAT(instruction_essa) },
103         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107         { "instruction_sie", VCPU_STAT(instruction_sie) },
108         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124         { "diagnose_10", VCPU_STAT(diagnose_10) },
125         { "diagnose_44", VCPU_STAT(diagnose_44) },
126         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127         { "diagnose_258", VCPU_STAT(diagnose_258) },
128         { "diagnose_308", VCPU_STAT(diagnose_308) },
129         { "diagnose_500", VCPU_STAT(diagnose_500) },
130         { NULL }
131 };
132
133 /* allow nested virtualization in KVM (if enabled by user space) */
134 static int nested;
135 module_param(nested, int, S_IRUGO);
136 MODULE_PARM_DESC(nested, "Nested virtualization support");
137
138 /* upper facilities limit for kvm */
139 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
140
141 unsigned long kvm_s390_fac_list_mask_size(void)
142 {
143         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
144         return ARRAY_SIZE(kvm_s390_fac_list_mask);
145 }
146
147 /* available cpu features supported by kvm */
148 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
149 /* available subfunctions indicated via query / "test bit" */
150 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
151
152 static struct gmap_notifier gmap_notifier;
153 static struct gmap_notifier vsie_gmap_notifier;
154 debug_info_t *kvm_s390_dbf;
155
156 /* Section: not file related */
157 int kvm_arch_hardware_enable(void)
158 {
159         /* every s390 is virtualization enabled ;-) */
160         return 0;
161 }
162
163 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
164                               unsigned long end);
165
166 /*
167  * This callback is executed during stop_machine(). All CPUs are therefore
168  * temporarily stopped. In order not to change guest behavior, we have to
169  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
170  * so a CPU won't be stopped while calculating with the epoch.
171  */
172 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
173                           void *v)
174 {
175         struct kvm *kvm;
176         struct kvm_vcpu *vcpu;
177         int i;
178         unsigned long long *delta = v;
179
180         list_for_each_entry(kvm, &vm_list, vm_list) {
181                 kvm->arch.epoch -= *delta;
182                 kvm_for_each_vcpu(i, vcpu, kvm) {
183                         vcpu->arch.sie_block->epoch -= *delta;
184                         if (vcpu->arch.cputm_enabled)
185                                 vcpu->arch.cputm_start += *delta;
186                         if (vcpu->arch.vsie_block)
187                                 vcpu->arch.vsie_block->epoch -= *delta;
188                 }
189         }
190         return NOTIFY_OK;
191 }
192
193 static struct notifier_block kvm_clock_notifier = {
194         .notifier_call = kvm_clock_sync,
195 };
196
197 int kvm_arch_hardware_setup(void)
198 {
199         gmap_notifier.notifier_call = kvm_gmap_notifier;
200         gmap_register_pte_notifier(&gmap_notifier);
201         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
202         gmap_register_pte_notifier(&vsie_gmap_notifier);
203         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
204                                        &kvm_clock_notifier);
205         return 0;
206 }
207
208 void kvm_arch_hardware_unsetup(void)
209 {
210         gmap_unregister_pte_notifier(&gmap_notifier);
211         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
212         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
213                                          &kvm_clock_notifier);
214 }
215
216 static void allow_cpu_feat(unsigned long nr)
217 {
218         set_bit_inv(nr, kvm_s390_available_cpu_feat);
219 }
220
221 static inline int plo_test_bit(unsigned char nr)
222 {
223         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
224         int cc;
225
226         asm volatile(
227                 /* Parameter registers are ignored for "test bit" */
228                 "       plo     0,0,0,0(0)\n"
229                 "       ipm     %0\n"
230                 "       srl     %0,28\n"
231                 : "=d" (cc)
232                 : "d" (r0)
233                 : "cc");
234         return cc == 0;
235 }
236
237 static void kvm_s390_cpu_feat_init(void)
238 {
239         int i;
240
241         for (i = 0; i < 256; ++i) {
242                 if (plo_test_bit(i))
243                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
244         }
245
246         if (test_facility(28)) /* TOD-clock steering */
247                 ptff(kvm_s390_available_subfunc.ptff,
248                      sizeof(kvm_s390_available_subfunc.ptff),
249                      PTFF_QAF);
250
251         if (test_facility(17)) { /* MSA */
252                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
253                               kvm_s390_available_subfunc.kmac);
254                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
255                               kvm_s390_available_subfunc.kmc);
256                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
257                               kvm_s390_available_subfunc.km);
258                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
259                               kvm_s390_available_subfunc.kimd);
260                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
261                               kvm_s390_available_subfunc.klmd);
262         }
263         if (test_facility(76)) /* MSA3 */
264                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
265                               kvm_s390_available_subfunc.pckmo);
266         if (test_facility(77)) { /* MSA4 */
267                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.kmctr);
269                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
270                               kvm_s390_available_subfunc.kmf);
271                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
272                               kvm_s390_available_subfunc.kmo);
273                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
274                               kvm_s390_available_subfunc.pcc);
275         }
276         if (test_facility(57)) /* MSA5 */
277                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
278                               kvm_s390_available_subfunc.ppno);
279
280         if (test_facility(146)) /* MSA8 */
281                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
282                               kvm_s390_available_subfunc.kma);
283
284         if (MACHINE_HAS_ESOP)
285                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
286         /*
287          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
288          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
289          */
290         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
291             !test_facility(3) || !nested)
292                 return;
293         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
294         if (sclp.has_64bscao)
295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
296         if (sclp.has_siif)
297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
298         if (sclp.has_gpere)
299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
300         if (sclp.has_gsls)
301                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
302         if (sclp.has_ib)
303                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
304         if (sclp.has_cei)
305                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
306         if (sclp.has_ibs)
307                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
308         if (sclp.has_kss)
309                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
310         /*
311          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
312          * all skey handling functions read/set the skey from the PGSTE
313          * instead of the real storage key.
314          *
315          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
316          * pages being detected as preserved although they are resident.
317          *
318          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
319          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
320          *
321          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
322          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
323          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
324          *
325          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
326          * cannot easily shadow the SCA because of the ipte lock.
327          */
328 }
329
330 int kvm_arch_init(void *opaque)
331 {
332         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
333         if (!kvm_s390_dbf)
334                 return -ENOMEM;
335
336         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
337                 debug_unregister(kvm_s390_dbf);
338                 return -ENOMEM;
339         }
340
341         kvm_s390_cpu_feat_init();
342
343         /* Register floating interrupt controller interface. */
344         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
345 }
346
347 void kvm_arch_exit(void)
348 {
349         debug_unregister(kvm_s390_dbf);
350 }
351
352 /* Section: device related */
353 long kvm_arch_dev_ioctl(struct file *filp,
354                         unsigned int ioctl, unsigned long arg)
355 {
356         if (ioctl == KVM_S390_ENABLE_SIE)
357                 return s390_enable_sie();
358         return -EINVAL;
359 }
360
361 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
362 {
363         int r;
364
365         switch (ext) {
366         case KVM_CAP_S390_PSW:
367         case KVM_CAP_S390_GMAP:
368         case KVM_CAP_SYNC_MMU:
369 #ifdef CONFIG_KVM_S390_UCONTROL
370         case KVM_CAP_S390_UCONTROL:
371 #endif
372         case KVM_CAP_ASYNC_PF:
373         case KVM_CAP_SYNC_REGS:
374         case KVM_CAP_ONE_REG:
375         case KVM_CAP_ENABLE_CAP:
376         case KVM_CAP_S390_CSS_SUPPORT:
377         case KVM_CAP_IOEVENTFD:
378         case KVM_CAP_DEVICE_CTRL:
379         case KVM_CAP_ENABLE_CAP_VM:
380         case KVM_CAP_S390_IRQCHIP:
381         case KVM_CAP_VM_ATTRIBUTES:
382         case KVM_CAP_MP_STATE:
383         case KVM_CAP_IMMEDIATE_EXIT:
384         case KVM_CAP_S390_INJECT_IRQ:
385         case KVM_CAP_S390_USER_SIGP:
386         case KVM_CAP_S390_USER_STSI:
387         case KVM_CAP_S390_SKEYS:
388         case KVM_CAP_S390_IRQ_STATE:
389         case KVM_CAP_S390_USER_INSTR0:
390         case KVM_CAP_S390_CMMA_MIGRATION:
391         case KVM_CAP_S390_AIS:
392                 r = 1;
393                 break;
394         case KVM_CAP_S390_MEM_OP:
395                 r = MEM_OP_MAX_SIZE;
396                 break;
397         case KVM_CAP_NR_VCPUS:
398         case KVM_CAP_MAX_VCPUS:
399                 r = KVM_S390_BSCA_CPU_SLOTS;
400                 if (!kvm_s390_use_sca_entries())
401                         r = KVM_MAX_VCPUS;
402                 else if (sclp.has_esca && sclp.has_64bscao)
403                         r = KVM_S390_ESCA_CPU_SLOTS;
404                 break;
405         case KVM_CAP_NR_MEMSLOTS:
406                 r = KVM_USER_MEM_SLOTS;
407                 break;
408         case KVM_CAP_S390_COW:
409                 r = MACHINE_HAS_ESOP;
410                 break;
411         case KVM_CAP_S390_VECTOR_REGISTERS:
412                 r = MACHINE_HAS_VX;
413                 break;
414         case KVM_CAP_S390_RI:
415                 r = test_facility(64);
416                 break;
417         case KVM_CAP_S390_GS:
418                 r = test_facility(133);
419                 break;
420         default:
421                 r = 0;
422         }
423         return r;
424 }
425
426 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
427                                         struct kvm_memory_slot *memslot)
428 {
429         gfn_t cur_gfn, last_gfn;
430         unsigned long address;
431         struct gmap *gmap = kvm->arch.gmap;
432
433         /* Loop over all guest pages */
434         last_gfn = memslot->base_gfn + memslot->npages;
435         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
436                 address = gfn_to_hva_memslot(memslot, cur_gfn);
437
438                 if (test_and_clear_guest_dirty(gmap->mm, address))
439                         mark_page_dirty(kvm, cur_gfn);
440                 if (fatal_signal_pending(current))
441                         return;
442                 cond_resched();
443         }
444 }
445
446 /* Section: vm related */
447 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
448
449 /*
450  * Get (and clear) the dirty memory log for a memory slot.
451  */
452 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
453                                struct kvm_dirty_log *log)
454 {
455         int r;
456         unsigned long n;
457         struct kvm_memslots *slots;
458         struct kvm_memory_slot *memslot;
459         int is_dirty = 0;
460
461         if (kvm_is_ucontrol(kvm))
462                 return -EINVAL;
463
464         mutex_lock(&kvm->slots_lock);
465
466         r = -EINVAL;
467         if (log->slot >= KVM_USER_MEM_SLOTS)
468                 goto out;
469
470         slots = kvm_memslots(kvm);
471         memslot = id_to_memslot(slots, log->slot);
472         r = -ENOENT;
473         if (!memslot->dirty_bitmap)
474                 goto out;
475
476         kvm_s390_sync_dirty_log(kvm, memslot);
477         r = kvm_get_dirty_log(kvm, log, &is_dirty);
478         if (r)
479                 goto out;
480
481         /* Clear the dirty log */
482         if (is_dirty) {
483                 n = kvm_dirty_bitmap_bytes(memslot);
484                 memset(memslot->dirty_bitmap, 0, n);
485         }
486         r = 0;
487 out:
488         mutex_unlock(&kvm->slots_lock);
489         return r;
490 }
491
492 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
493 {
494         unsigned int i;
495         struct kvm_vcpu *vcpu;
496
497         kvm_for_each_vcpu(i, vcpu, kvm) {
498                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
499         }
500 }
501
502 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
503 {
504         int r;
505
506         if (cap->flags)
507                 return -EINVAL;
508
509         switch (cap->cap) {
510         case KVM_CAP_S390_IRQCHIP:
511                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
512                 kvm->arch.use_irqchip = 1;
513                 r = 0;
514                 break;
515         case KVM_CAP_S390_USER_SIGP:
516                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
517                 kvm->arch.user_sigp = 1;
518                 r = 0;
519                 break;
520         case KVM_CAP_S390_VECTOR_REGISTERS:
521                 mutex_lock(&kvm->lock);
522                 if (kvm->created_vcpus) {
523                         r = -EBUSY;
524                 } else if (MACHINE_HAS_VX) {
525                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
526                         set_kvm_facility(kvm->arch.model.fac_list, 129);
527                         if (test_facility(134)) {
528                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
529                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
530                         }
531                         if (test_facility(135)) {
532                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
533                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
534                         }
535                         r = 0;
536                 } else
537                         r = -EINVAL;
538                 mutex_unlock(&kvm->lock);
539                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
540                          r ? "(not available)" : "(success)");
541                 break;
542         case KVM_CAP_S390_RI:
543                 r = -EINVAL;
544                 mutex_lock(&kvm->lock);
545                 if (kvm->created_vcpus) {
546                         r = -EBUSY;
547                 } else if (test_facility(64)) {
548                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
549                         set_kvm_facility(kvm->arch.model.fac_list, 64);
550                         r = 0;
551                 }
552                 mutex_unlock(&kvm->lock);
553                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
554                          r ? "(not available)" : "(success)");
555                 break;
556         case KVM_CAP_S390_AIS:
557                 mutex_lock(&kvm->lock);
558                 if (kvm->created_vcpus) {
559                         r = -EBUSY;
560                 } else {
561                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
562                         set_kvm_facility(kvm->arch.model.fac_list, 72);
563                         r = 0;
564                 }
565                 mutex_unlock(&kvm->lock);
566                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
567                          r ? "(not available)" : "(success)");
568                 break;
569         case KVM_CAP_S390_GS:
570                 r = -EINVAL;
571                 mutex_lock(&kvm->lock);
572                 if (atomic_read(&kvm->online_vcpus)) {
573                         r = -EBUSY;
574                 } else if (test_facility(133)) {
575                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
576                         set_kvm_facility(kvm->arch.model.fac_list, 133);
577                         r = 0;
578                 }
579                 mutex_unlock(&kvm->lock);
580                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
581                          r ? "(not available)" : "(success)");
582                 break;
583         case KVM_CAP_S390_USER_STSI:
584                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
585                 kvm->arch.user_stsi = 1;
586                 r = 0;
587                 break;
588         case KVM_CAP_S390_USER_INSTR0:
589                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
590                 kvm->arch.user_instr0 = 1;
591                 icpt_operexc_on_all_vcpus(kvm);
592                 r = 0;
593                 break;
594         default:
595                 r = -EINVAL;
596                 break;
597         }
598         return r;
599 }
600
601 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
602 {
603         int ret;
604
605         switch (attr->attr) {
606         case KVM_S390_VM_MEM_LIMIT_SIZE:
607                 ret = 0;
608                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
609                          kvm->arch.mem_limit);
610                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
611                         ret = -EFAULT;
612                 break;
613         default:
614                 ret = -ENXIO;
615                 break;
616         }
617         return ret;
618 }
619
620 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
621 {
622         int ret;
623         unsigned int idx;
624         switch (attr->attr) {
625         case KVM_S390_VM_MEM_ENABLE_CMMA:
626                 ret = -ENXIO;
627                 if (!sclp.has_cmma)
628                         break;
629
630                 ret = -EBUSY;
631                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
632                 mutex_lock(&kvm->lock);
633                 if (!kvm->created_vcpus) {
634                         kvm->arch.use_cmma = 1;
635                         ret = 0;
636                 }
637                 mutex_unlock(&kvm->lock);
638                 break;
639         case KVM_S390_VM_MEM_CLR_CMMA:
640                 ret = -ENXIO;
641                 if (!sclp.has_cmma)
642                         break;
643                 ret = -EINVAL;
644                 if (!kvm->arch.use_cmma)
645                         break;
646
647                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
648                 mutex_lock(&kvm->lock);
649                 idx = srcu_read_lock(&kvm->srcu);
650                 s390_reset_cmma(kvm->arch.gmap->mm);
651                 srcu_read_unlock(&kvm->srcu, idx);
652                 mutex_unlock(&kvm->lock);
653                 ret = 0;
654                 break;
655         case KVM_S390_VM_MEM_LIMIT_SIZE: {
656                 unsigned long new_limit;
657
658                 if (kvm_is_ucontrol(kvm))
659                         return -EINVAL;
660
661                 if (get_user(new_limit, (u64 __user *)attr->addr))
662                         return -EFAULT;
663
664                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
665                     new_limit > kvm->arch.mem_limit)
666                         return -E2BIG;
667
668                 if (!new_limit)
669                         return -EINVAL;
670
671                 /* gmap_create takes last usable address */
672                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
673                         new_limit -= 1;
674
675                 ret = -EBUSY;
676                 mutex_lock(&kvm->lock);
677                 if (!kvm->created_vcpus) {
678                         /* gmap_create will round the limit up */
679                         struct gmap *new = gmap_create(current->mm, new_limit);
680
681                         if (!new) {
682                                 ret = -ENOMEM;
683                         } else {
684                                 gmap_remove(kvm->arch.gmap);
685                                 new->private = kvm;
686                                 kvm->arch.gmap = new;
687                                 ret = 0;
688                         }
689                 }
690                 mutex_unlock(&kvm->lock);
691                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
692                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
693                          (void *) kvm->arch.gmap->asce);
694                 break;
695         }
696         default:
697                 ret = -ENXIO;
698                 break;
699         }
700         return ret;
701 }
702
703 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
704
705 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
706 {
707         struct kvm_vcpu *vcpu;
708         int i;
709
710         if (!test_kvm_facility(kvm, 76))
711                 return -EINVAL;
712
713         mutex_lock(&kvm->lock);
714         switch (attr->attr) {
715         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
716                 get_random_bytes(
717                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
718                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
719                 kvm->arch.crypto.aes_kw = 1;
720                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
721                 break;
722         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
723                 get_random_bytes(
724                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
725                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
726                 kvm->arch.crypto.dea_kw = 1;
727                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
728                 break;
729         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
730                 kvm->arch.crypto.aes_kw = 0;
731                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
732                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
733                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
734                 break;
735         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
736                 kvm->arch.crypto.dea_kw = 0;
737                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
738                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
739                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
740                 break;
741         default:
742                 mutex_unlock(&kvm->lock);
743                 return -ENXIO;
744         }
745
746         kvm_for_each_vcpu(i, vcpu, kvm) {
747                 kvm_s390_vcpu_crypto_setup(vcpu);
748                 exit_sie(vcpu);
749         }
750         mutex_unlock(&kvm->lock);
751         return 0;
752 }
753
754 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
755 {
756         int cx;
757         struct kvm_vcpu *vcpu;
758
759         kvm_for_each_vcpu(cx, vcpu, kvm)
760                 kvm_s390_sync_request(req, vcpu);
761 }
762
763 /*
764  * Must be called with kvm->srcu held to avoid races on memslots, and with
765  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
766  */
767 static int kvm_s390_vm_start_migration(struct kvm *kvm)
768 {
769         struct kvm_s390_migration_state *mgs;
770         struct kvm_memory_slot *ms;
771         /* should be the only one */
772         struct kvm_memslots *slots;
773         unsigned long ram_pages;
774         int slotnr;
775
776         /* migration mode already enabled */
777         if (kvm->arch.migration_state)
778                 return 0;
779
780         slots = kvm_memslots(kvm);
781         if (!slots || !slots->used_slots)
782                 return -EINVAL;
783
784         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
785         if (!mgs)
786                 return -ENOMEM;
787         kvm->arch.migration_state = mgs;
788
789         if (kvm->arch.use_cmma) {
790                 /*
791                  * Get the last slot. They should be sorted by base_gfn, so the
792                  * last slot is also the one at the end of the address space.
793                  * We have verified above that at least one slot is present.
794                  */
795                 ms = slots->memslots + slots->used_slots - 1;
796                 /* round up so we only use full longs */
797                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
798                 /* allocate enough bytes to store all the bits */
799                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
800                 if (!mgs->pgste_bitmap) {
801                         kfree(mgs);
802                         kvm->arch.migration_state = NULL;
803                         return -ENOMEM;
804                 }
805
806                 mgs->bitmap_size = ram_pages;
807                 atomic64_set(&mgs->dirty_pages, ram_pages);
808                 /* mark all the pages in active slots as dirty */
809                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
810                         ms = slots->memslots + slotnr;
811                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
812                 }
813
814                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
815         }
816         return 0;
817 }
818
819 /*
820  * Must be called with kvm->lock to avoid races with ourselves and
821  * kvm_s390_vm_start_migration.
822  */
823 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
824 {
825         struct kvm_s390_migration_state *mgs;
826
827         /* migration mode already disabled */
828         if (!kvm->arch.migration_state)
829                 return 0;
830         mgs = kvm->arch.migration_state;
831         kvm->arch.migration_state = NULL;
832
833         if (kvm->arch.use_cmma) {
834                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
835                 vfree(mgs->pgste_bitmap);
836         }
837         kfree(mgs);
838         return 0;
839 }
840
841 static int kvm_s390_vm_set_migration(struct kvm *kvm,
842                                      struct kvm_device_attr *attr)
843 {
844         int idx, res = -ENXIO;
845
846         mutex_lock(&kvm->lock);
847         switch (attr->attr) {
848         case KVM_S390_VM_MIGRATION_START:
849                 idx = srcu_read_lock(&kvm->srcu);
850                 res = kvm_s390_vm_start_migration(kvm);
851                 srcu_read_unlock(&kvm->srcu, idx);
852                 break;
853         case KVM_S390_VM_MIGRATION_STOP:
854                 res = kvm_s390_vm_stop_migration(kvm);
855                 break;
856         default:
857                 break;
858         }
859         mutex_unlock(&kvm->lock);
860
861         return res;
862 }
863
864 static int kvm_s390_vm_get_migration(struct kvm *kvm,
865                                      struct kvm_device_attr *attr)
866 {
867         u64 mig = (kvm->arch.migration_state != NULL);
868
869         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
870                 return -ENXIO;
871
872         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
873                 return -EFAULT;
874         return 0;
875 }
876
877 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
878 {
879         u8 gtod_high;
880
881         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
882                                            sizeof(gtod_high)))
883                 return -EFAULT;
884
885         if (gtod_high != 0)
886                 return -EINVAL;
887         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
888
889         return 0;
890 }
891
892 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
893 {
894         u64 gtod;
895
896         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
897                 return -EFAULT;
898
899         kvm_s390_set_tod_clock(kvm, gtod);
900         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
901         return 0;
902 }
903
904 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
905 {
906         int ret;
907
908         if (attr->flags)
909                 return -EINVAL;
910
911         switch (attr->attr) {
912         case KVM_S390_VM_TOD_HIGH:
913                 ret = kvm_s390_set_tod_high(kvm, attr);
914                 break;
915         case KVM_S390_VM_TOD_LOW:
916                 ret = kvm_s390_set_tod_low(kvm, attr);
917                 break;
918         default:
919                 ret = -ENXIO;
920                 break;
921         }
922         return ret;
923 }
924
925 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927         u8 gtod_high = 0;
928
929         if (copy_to_user((void __user *)attr->addr, &gtod_high,
930                                          sizeof(gtod_high)))
931                 return -EFAULT;
932         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
933
934         return 0;
935 }
936
937 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
938 {
939         u64 gtod;
940
941         gtod = kvm_s390_get_tod_clock_fast(kvm);
942         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
943                 return -EFAULT;
944         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
945
946         return 0;
947 }
948
949 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
950 {
951         int ret;
952
953         if (attr->flags)
954                 return -EINVAL;
955
956         switch (attr->attr) {
957         case KVM_S390_VM_TOD_HIGH:
958                 ret = kvm_s390_get_tod_high(kvm, attr);
959                 break;
960         case KVM_S390_VM_TOD_LOW:
961                 ret = kvm_s390_get_tod_low(kvm, attr);
962                 break;
963         default:
964                 ret = -ENXIO;
965                 break;
966         }
967         return ret;
968 }
969
970 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
971 {
972         struct kvm_s390_vm_cpu_processor *proc;
973         u16 lowest_ibc, unblocked_ibc;
974         int ret = 0;
975
976         mutex_lock(&kvm->lock);
977         if (kvm->created_vcpus) {
978                 ret = -EBUSY;
979                 goto out;
980         }
981         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
982         if (!proc) {
983                 ret = -ENOMEM;
984                 goto out;
985         }
986         if (!copy_from_user(proc, (void __user *)attr->addr,
987                             sizeof(*proc))) {
988                 kvm->arch.model.cpuid = proc->cpuid;
989                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
990                 unblocked_ibc = sclp.ibc & 0xfff;
991                 if (lowest_ibc && proc->ibc) {
992                         if (proc->ibc > unblocked_ibc)
993                                 kvm->arch.model.ibc = unblocked_ibc;
994                         else if (proc->ibc < lowest_ibc)
995                                 kvm->arch.model.ibc = lowest_ibc;
996                         else
997                                 kvm->arch.model.ibc = proc->ibc;
998                 }
999                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1000                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1001                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1002                          kvm->arch.model.ibc,
1003                          kvm->arch.model.cpuid);
1004                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1005                          kvm->arch.model.fac_list[0],
1006                          kvm->arch.model.fac_list[1],
1007                          kvm->arch.model.fac_list[2]);
1008         } else
1009                 ret = -EFAULT;
1010         kfree(proc);
1011 out:
1012         mutex_unlock(&kvm->lock);
1013         return ret;
1014 }
1015
1016 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1017                                        struct kvm_device_attr *attr)
1018 {
1019         struct kvm_s390_vm_cpu_feat data;
1020         int ret = -EBUSY;
1021
1022         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1023                 return -EFAULT;
1024         if (!bitmap_subset((unsigned long *) data.feat,
1025                            kvm_s390_available_cpu_feat,
1026                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1027                 return -EINVAL;
1028
1029         mutex_lock(&kvm->lock);
1030         if (!atomic_read(&kvm->online_vcpus)) {
1031                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1032                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1033                 ret = 0;
1034         }
1035         mutex_unlock(&kvm->lock);
1036         return ret;
1037 }
1038
1039 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1040                                           struct kvm_device_attr *attr)
1041 {
1042         /*
1043          * Once supported by kernel + hw, we have to store the subfunctions
1044          * in kvm->arch and remember that user space configured them.
1045          */
1046         return -ENXIO;
1047 }
1048
1049 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1050 {
1051         int ret = -ENXIO;
1052
1053         switch (attr->attr) {
1054         case KVM_S390_VM_CPU_PROCESSOR:
1055                 ret = kvm_s390_set_processor(kvm, attr);
1056                 break;
1057         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1058                 ret = kvm_s390_set_processor_feat(kvm, attr);
1059                 break;
1060         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1061                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1062                 break;
1063         }
1064         return ret;
1065 }
1066
1067 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1068 {
1069         struct kvm_s390_vm_cpu_processor *proc;
1070         int ret = 0;
1071
1072         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1073         if (!proc) {
1074                 ret = -ENOMEM;
1075                 goto out;
1076         }
1077         proc->cpuid = kvm->arch.model.cpuid;
1078         proc->ibc = kvm->arch.model.ibc;
1079         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1080                S390_ARCH_FAC_LIST_SIZE_BYTE);
1081         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1082                  kvm->arch.model.ibc,
1083                  kvm->arch.model.cpuid);
1084         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1085                  kvm->arch.model.fac_list[0],
1086                  kvm->arch.model.fac_list[1],
1087                  kvm->arch.model.fac_list[2]);
1088         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1089                 ret = -EFAULT;
1090         kfree(proc);
1091 out:
1092         return ret;
1093 }
1094
1095 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097         struct kvm_s390_vm_cpu_machine *mach;
1098         int ret = 0;
1099
1100         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1101         if (!mach) {
1102                 ret = -ENOMEM;
1103                 goto out;
1104         }
1105         get_cpu_id((struct cpuid *) &mach->cpuid);
1106         mach->ibc = sclp.ibc;
1107         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1108                S390_ARCH_FAC_LIST_SIZE_BYTE);
1109         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1110                sizeof(S390_lowcore.stfle_fac_list));
1111         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1112                  kvm->arch.model.ibc,
1113                  kvm->arch.model.cpuid);
1114         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1115                  mach->fac_mask[0],
1116                  mach->fac_mask[1],
1117                  mach->fac_mask[2]);
1118         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1119                  mach->fac_list[0],
1120                  mach->fac_list[1],
1121                  mach->fac_list[2]);
1122         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1123                 ret = -EFAULT;
1124         kfree(mach);
1125 out:
1126         return ret;
1127 }
1128
1129 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1130                                        struct kvm_device_attr *attr)
1131 {
1132         struct kvm_s390_vm_cpu_feat data;
1133
1134         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1135                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1136         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1137                 return -EFAULT;
1138         return 0;
1139 }
1140
1141 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1142                                      struct kvm_device_attr *attr)
1143 {
1144         struct kvm_s390_vm_cpu_feat data;
1145
1146         bitmap_copy((unsigned long *) data.feat,
1147                     kvm_s390_available_cpu_feat,
1148                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1149         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1150                 return -EFAULT;
1151         return 0;
1152 }
1153
1154 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1155                                           struct kvm_device_attr *attr)
1156 {
1157         /*
1158          * Once we can actually configure subfunctions (kernel + hw support),
1159          * we have to check if they were already set by user space, if so copy
1160          * them from kvm->arch.
1161          */
1162         return -ENXIO;
1163 }
1164
1165 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1166                                         struct kvm_device_attr *attr)
1167 {
1168         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1169             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1170                 return -EFAULT;
1171         return 0;
1172 }
1173 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175         int ret = -ENXIO;
1176
1177         switch (attr->attr) {
1178         case KVM_S390_VM_CPU_PROCESSOR:
1179                 ret = kvm_s390_get_processor(kvm, attr);
1180                 break;
1181         case KVM_S390_VM_CPU_MACHINE:
1182                 ret = kvm_s390_get_machine(kvm, attr);
1183                 break;
1184         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1185                 ret = kvm_s390_get_processor_feat(kvm, attr);
1186                 break;
1187         case KVM_S390_VM_CPU_MACHINE_FEAT:
1188                 ret = kvm_s390_get_machine_feat(kvm, attr);
1189                 break;
1190         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1191                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1192                 break;
1193         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1194                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1195                 break;
1196         }
1197         return ret;
1198 }
1199
1200 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202         int ret;
1203
1204         switch (attr->group) {
1205         case KVM_S390_VM_MEM_CTRL:
1206                 ret = kvm_s390_set_mem_control(kvm, attr);
1207                 break;
1208         case KVM_S390_VM_TOD:
1209                 ret = kvm_s390_set_tod(kvm, attr);
1210                 break;
1211         case KVM_S390_VM_CPU_MODEL:
1212                 ret = kvm_s390_set_cpu_model(kvm, attr);
1213                 break;
1214         case KVM_S390_VM_CRYPTO:
1215                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1216                 break;
1217         case KVM_S390_VM_MIGRATION:
1218                 ret = kvm_s390_vm_set_migration(kvm, attr);
1219                 break;
1220         default:
1221                 ret = -ENXIO;
1222                 break;
1223         }
1224
1225         return ret;
1226 }
1227
1228 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1229 {
1230         int ret;
1231
1232         switch (attr->group) {
1233         case KVM_S390_VM_MEM_CTRL:
1234                 ret = kvm_s390_get_mem_control(kvm, attr);
1235                 break;
1236         case KVM_S390_VM_TOD:
1237                 ret = kvm_s390_get_tod(kvm, attr);
1238                 break;
1239         case KVM_S390_VM_CPU_MODEL:
1240                 ret = kvm_s390_get_cpu_model(kvm, attr);
1241                 break;
1242         case KVM_S390_VM_MIGRATION:
1243                 ret = kvm_s390_vm_get_migration(kvm, attr);
1244                 break;
1245         default:
1246                 ret = -ENXIO;
1247                 break;
1248         }
1249
1250         return ret;
1251 }
1252
1253 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1254 {
1255         int ret;
1256
1257         switch (attr->group) {
1258         case KVM_S390_VM_MEM_CTRL:
1259                 switch (attr->attr) {
1260                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1261                 case KVM_S390_VM_MEM_CLR_CMMA:
1262                         ret = sclp.has_cmma ? 0 : -ENXIO;
1263                         break;
1264                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1265                         ret = 0;
1266                         break;
1267                 default:
1268                         ret = -ENXIO;
1269                         break;
1270                 }
1271                 break;
1272         case KVM_S390_VM_TOD:
1273                 switch (attr->attr) {
1274                 case KVM_S390_VM_TOD_LOW:
1275                 case KVM_S390_VM_TOD_HIGH:
1276                         ret = 0;
1277                         break;
1278                 default:
1279                         ret = -ENXIO;
1280                         break;
1281                 }
1282                 break;
1283         case KVM_S390_VM_CPU_MODEL:
1284                 switch (attr->attr) {
1285                 case KVM_S390_VM_CPU_PROCESSOR:
1286                 case KVM_S390_VM_CPU_MACHINE:
1287                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1288                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1289                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1290                         ret = 0;
1291                         break;
1292                 /* configuring subfunctions is not supported yet */
1293                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1294                 default:
1295                         ret = -ENXIO;
1296                         break;
1297                 }
1298                 break;
1299         case KVM_S390_VM_CRYPTO:
1300                 switch (attr->attr) {
1301                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1302                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1303                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1304                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1305                         ret = 0;
1306                         break;
1307                 default:
1308                         ret = -ENXIO;
1309                         break;
1310                 }
1311                 break;
1312         case KVM_S390_VM_MIGRATION:
1313                 ret = 0;
1314                 break;
1315         default:
1316                 ret = -ENXIO;
1317                 break;
1318         }
1319
1320         return ret;
1321 }
1322
1323 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1324 {
1325         uint8_t *keys;
1326         uint64_t hva;
1327         int srcu_idx, i, r = 0;
1328
1329         if (args->flags != 0)
1330                 return -EINVAL;
1331
1332         /* Is this guest using storage keys? */
1333         if (!mm_use_skey(current->mm))
1334                 return KVM_S390_GET_SKEYS_NONE;
1335
1336         /* Enforce sane limit on memory allocation */
1337         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1338                 return -EINVAL;
1339
1340         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1341         if (!keys)
1342                 return -ENOMEM;
1343
1344         down_read(&current->mm->mmap_sem);
1345         srcu_idx = srcu_read_lock(&kvm->srcu);
1346         for (i = 0; i < args->count; i++) {
1347                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1348                 if (kvm_is_error_hva(hva)) {
1349                         r = -EFAULT;
1350                         break;
1351                 }
1352
1353                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1354                 if (r)
1355                         break;
1356         }
1357         srcu_read_unlock(&kvm->srcu, srcu_idx);
1358         up_read(&current->mm->mmap_sem);
1359
1360         if (!r) {
1361                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1362                                  sizeof(uint8_t) * args->count);
1363                 if (r)
1364                         r = -EFAULT;
1365         }
1366
1367         kvfree(keys);
1368         return r;
1369 }
1370
1371 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1372 {
1373         uint8_t *keys;
1374         uint64_t hva;
1375         int srcu_idx, i, r = 0;
1376
1377         if (args->flags != 0)
1378                 return -EINVAL;
1379
1380         /* Enforce sane limit on memory allocation */
1381         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1382                 return -EINVAL;
1383
1384         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1385         if (!keys)
1386                 return -ENOMEM;
1387
1388         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1389                            sizeof(uint8_t) * args->count);
1390         if (r) {
1391                 r = -EFAULT;
1392                 goto out;
1393         }
1394
1395         /* Enable storage key handling for the guest */
1396         r = s390_enable_skey();
1397         if (r)
1398                 goto out;
1399
1400         down_read(&current->mm->mmap_sem);
1401         srcu_idx = srcu_read_lock(&kvm->srcu);
1402         for (i = 0; i < args->count; i++) {
1403                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1404                 if (kvm_is_error_hva(hva)) {
1405                         r = -EFAULT;
1406                         break;
1407                 }
1408
1409                 /* Lowest order bit is reserved */
1410                 if (keys[i] & 0x01) {
1411                         r = -EINVAL;
1412                         break;
1413                 }
1414
1415                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1416                 if (r)
1417                         break;
1418         }
1419         srcu_read_unlock(&kvm->srcu, srcu_idx);
1420         up_read(&current->mm->mmap_sem);
1421 out:
1422         kvfree(keys);
1423         return r;
1424 }
1425
1426 /*
1427  * Base address and length must be sent at the start of each block, therefore
1428  * it's cheaper to send some clean data, as long as it's less than the size of
1429  * two longs.
1430  */
1431 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1432 /* for consistency */
1433 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1434
1435 /*
1436  * This function searches for the next page with dirty CMMA attributes, and
1437  * saves the attributes in the buffer up to either the end of the buffer or
1438  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1439  * no trailing clean bytes are saved.
1440  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1441  * output buffer will indicate 0 as length.
1442  */
1443 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1444                                   struct kvm_s390_cmma_log *args)
1445 {
1446         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1447         unsigned long bufsize, hva, pgstev, i, next, cur;
1448         int srcu_idx, peek, r = 0, rr;
1449         u8 *res;
1450
1451         cur = args->start_gfn;
1452         i = next = pgstev = 0;
1453
1454         if (unlikely(!kvm->arch.use_cmma))
1455                 return -ENXIO;
1456         /* Invalid/unsupported flags were specified */
1457         if (args->flags & ~KVM_S390_CMMA_PEEK)
1458                 return -EINVAL;
1459         /* Migration mode query, and we are not doing a migration */
1460         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1461         if (!peek && !s)
1462                 return -EINVAL;
1463         /* CMMA is disabled or was not used, or the buffer has length zero */
1464         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1465         if (!bufsize || !kvm->mm->context.use_cmma) {
1466                 memset(args, 0, sizeof(*args));
1467                 return 0;
1468         }
1469
1470         if (!peek) {
1471                 /* We are not peeking, and there are no dirty pages */
1472                 if (!atomic64_read(&s->dirty_pages)) {
1473                         memset(args, 0, sizeof(*args));
1474                         return 0;
1475                 }
1476                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1477                                     args->start_gfn);
1478                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1479                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1480                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1481                         memset(args, 0, sizeof(*args));
1482                         return 0;
1483                 }
1484                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1485         }
1486
1487         res = vmalloc(bufsize);
1488         if (!res)
1489                 return -ENOMEM;
1490
1491         args->start_gfn = cur;
1492
1493         down_read(&kvm->mm->mmap_sem);
1494         srcu_idx = srcu_read_lock(&kvm->srcu);
1495         while (i < bufsize) {
1496                 hva = gfn_to_hva(kvm, cur);
1497                 if (kvm_is_error_hva(hva)) {
1498                         r = -EFAULT;
1499                         break;
1500                 }
1501                 /* decrement only if we actually flipped the bit to 0 */
1502                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1503                         atomic64_dec(&s->dirty_pages);
1504                 r = get_pgste(kvm->mm, hva, &pgstev);
1505                 if (r < 0)
1506                         pgstev = 0;
1507                 /* save the value */
1508                 res[i++] = (pgstev >> 24) & 0x3;
1509                 /*
1510                  * if the next bit is too far away, stop.
1511                  * if we reached the previous "next", find the next one
1512                  */
1513                 if (!peek) {
1514                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1515                                 break;
1516                         if (cur == next)
1517                                 next = find_next_bit(s->pgste_bitmap,
1518                                                      s->bitmap_size, cur + 1);
1519                 /* reached the end of the bitmap or of the buffer, stop */
1520                         if ((next >= s->bitmap_size) ||
1521                             (next >= args->start_gfn + bufsize))
1522                                 break;
1523                 }
1524                 cur++;
1525         }
1526         srcu_read_unlock(&kvm->srcu, srcu_idx);
1527         up_read(&kvm->mm->mmap_sem);
1528         args->count = i;
1529         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1530
1531         rr = copy_to_user((void __user *)args->values, res, args->count);
1532         if (rr)
1533                 r = -EFAULT;
1534
1535         vfree(res);
1536         return r;
1537 }
1538
1539 /*
1540  * This function sets the CMMA attributes for the given pages. If the input
1541  * buffer has zero length, no action is taken, otherwise the attributes are
1542  * set and the mm->context.use_cmma flag is set.
1543  */
1544 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1545                                   const struct kvm_s390_cmma_log *args)
1546 {
1547         unsigned long hva, mask, pgstev, i;
1548         uint8_t *bits;
1549         int srcu_idx, r = 0;
1550
1551         mask = args->mask;
1552
1553         if (!kvm->arch.use_cmma)
1554                 return -ENXIO;
1555         /* invalid/unsupported flags */
1556         if (args->flags != 0)
1557                 return -EINVAL;
1558         /* Enforce sane limit on memory allocation */
1559         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1560                 return -EINVAL;
1561         /* Nothing to do */
1562         if (args->count == 0)
1563                 return 0;
1564
1565         bits = vmalloc(sizeof(*bits) * args->count);
1566         if (!bits)
1567                 return -ENOMEM;
1568
1569         r = copy_from_user(bits, (void __user *)args->values, args->count);
1570         if (r) {
1571                 r = -EFAULT;
1572                 goto out;
1573         }
1574
1575         down_read(&kvm->mm->mmap_sem);
1576         srcu_idx = srcu_read_lock(&kvm->srcu);
1577         for (i = 0; i < args->count; i++) {
1578                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1579                 if (kvm_is_error_hva(hva)) {
1580                         r = -EFAULT;
1581                         break;
1582                 }
1583
1584                 pgstev = bits[i];
1585                 pgstev = pgstev << 24;
1586                 mask &= _PGSTE_GPS_USAGE_MASK;
1587                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1588         }
1589         srcu_read_unlock(&kvm->srcu, srcu_idx);
1590         up_read(&kvm->mm->mmap_sem);
1591
1592         if (!kvm->mm->context.use_cmma) {
1593                 down_write(&kvm->mm->mmap_sem);
1594                 kvm->mm->context.use_cmma = 1;
1595                 up_write(&kvm->mm->mmap_sem);
1596         }
1597 out:
1598         vfree(bits);
1599         return r;
1600 }
1601
1602 long kvm_arch_vm_ioctl(struct file *filp,
1603                        unsigned int ioctl, unsigned long arg)
1604 {
1605         struct kvm *kvm = filp->private_data;
1606         void __user *argp = (void __user *)arg;
1607         struct kvm_device_attr attr;
1608         int r;
1609
1610         switch (ioctl) {
1611         case KVM_S390_INTERRUPT: {
1612                 struct kvm_s390_interrupt s390int;
1613
1614                 r = -EFAULT;
1615                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1616                         break;
1617                 r = kvm_s390_inject_vm(kvm, &s390int);
1618                 break;
1619         }
1620         case KVM_ENABLE_CAP: {
1621                 struct kvm_enable_cap cap;
1622                 r = -EFAULT;
1623                 if (copy_from_user(&cap, argp, sizeof(cap)))
1624                         break;
1625                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1626                 break;
1627         }
1628         case KVM_CREATE_IRQCHIP: {
1629                 struct kvm_irq_routing_entry routing;
1630
1631                 r = -EINVAL;
1632                 if (kvm->arch.use_irqchip) {
1633                         /* Set up dummy routing. */
1634                         memset(&routing, 0, sizeof(routing));
1635                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1636                 }
1637                 break;
1638         }
1639         case KVM_SET_DEVICE_ATTR: {
1640                 r = -EFAULT;
1641                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1642                         break;
1643                 r = kvm_s390_vm_set_attr(kvm, &attr);
1644                 break;
1645         }
1646         case KVM_GET_DEVICE_ATTR: {
1647                 r = -EFAULT;
1648                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1649                         break;
1650                 r = kvm_s390_vm_get_attr(kvm, &attr);
1651                 break;
1652         }
1653         case KVM_HAS_DEVICE_ATTR: {
1654                 r = -EFAULT;
1655                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1656                         break;
1657                 r = kvm_s390_vm_has_attr(kvm, &attr);
1658                 break;
1659         }
1660         case KVM_S390_GET_SKEYS: {
1661                 struct kvm_s390_skeys args;
1662
1663                 r = -EFAULT;
1664                 if (copy_from_user(&args, argp,
1665                                    sizeof(struct kvm_s390_skeys)))
1666                         break;
1667                 r = kvm_s390_get_skeys(kvm, &args);
1668                 break;
1669         }
1670         case KVM_S390_SET_SKEYS: {
1671                 struct kvm_s390_skeys args;
1672
1673                 r = -EFAULT;
1674                 if (copy_from_user(&args, argp,
1675                                    sizeof(struct kvm_s390_skeys)))
1676                         break;
1677                 r = kvm_s390_set_skeys(kvm, &args);
1678                 break;
1679         }
1680         case KVM_S390_GET_CMMA_BITS: {
1681                 struct kvm_s390_cmma_log args;
1682
1683                 r = -EFAULT;
1684                 if (copy_from_user(&args, argp, sizeof(args)))
1685                         break;
1686                 r = kvm_s390_get_cmma_bits(kvm, &args);
1687                 if (!r) {
1688                         r = copy_to_user(argp, &args, sizeof(args));
1689                         if (r)
1690                                 r = -EFAULT;
1691                 }
1692                 break;
1693         }
1694         case KVM_S390_SET_CMMA_BITS: {
1695                 struct kvm_s390_cmma_log args;
1696
1697                 r = -EFAULT;
1698                 if (copy_from_user(&args, argp, sizeof(args)))
1699                         break;
1700                 r = kvm_s390_set_cmma_bits(kvm, &args);
1701                 break;
1702         }
1703         default:
1704                 r = -ENOTTY;
1705         }
1706
1707         return r;
1708 }
1709
1710 static int kvm_s390_query_ap_config(u8 *config)
1711 {
1712         u32 fcn_code = 0x04000000UL;
1713         u32 cc = 0;
1714
1715         memset(config, 0, 128);
1716         asm volatile(
1717                 "lgr 0,%1\n"
1718                 "lgr 2,%2\n"
1719                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1720                 "0: ipm %0\n"
1721                 "srl %0,28\n"
1722                 "1:\n"
1723                 EX_TABLE(0b, 1b)
1724                 : "+r" (cc)
1725                 : "r" (fcn_code), "r" (config)
1726                 : "cc", "0", "2", "memory"
1727         );
1728
1729         return cc;
1730 }
1731
1732 static int kvm_s390_apxa_installed(void)
1733 {
1734         u8 config[128];
1735         int cc;
1736
1737         if (test_facility(12)) {
1738                 cc = kvm_s390_query_ap_config(config);
1739
1740                 if (cc)
1741                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1742                 else
1743                         return config[0] & 0x40;
1744         }
1745
1746         return 0;
1747 }
1748
1749 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1750 {
1751         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1752
1753         if (kvm_s390_apxa_installed())
1754                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1755         else
1756                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1757 }
1758
1759 static u64 kvm_s390_get_initial_cpuid(void)
1760 {
1761         struct cpuid cpuid;
1762
1763         get_cpu_id(&cpuid);
1764         cpuid.version = 0xff;
1765         return *((u64 *) &cpuid);
1766 }
1767
1768 static void kvm_s390_crypto_init(struct kvm *kvm)
1769 {
1770         if (!test_kvm_facility(kvm, 76))
1771                 return;
1772
1773         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1774         kvm_s390_set_crycb_format(kvm);
1775
1776         /* Enable AES/DEA protected key functions by default */
1777         kvm->arch.crypto.aes_kw = 1;
1778         kvm->arch.crypto.dea_kw = 1;
1779         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1780                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1781         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1782                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1783 }
1784
1785 static void sca_dispose(struct kvm *kvm)
1786 {
1787         if (kvm->arch.use_esca)
1788                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1789         else
1790                 free_page((unsigned long)(kvm->arch.sca));
1791         kvm->arch.sca = NULL;
1792 }
1793
1794 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1795 {
1796         gfp_t alloc_flags = GFP_KERNEL;
1797         int i, rc;
1798         char debug_name[16];
1799         static unsigned long sca_offset;
1800
1801         rc = -EINVAL;
1802 #ifdef CONFIG_KVM_S390_UCONTROL
1803         if (type & ~KVM_VM_S390_UCONTROL)
1804                 goto out_err;
1805         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1806                 goto out_err;
1807 #else
1808         if (type)
1809                 goto out_err;
1810 #endif
1811
1812         rc = s390_enable_sie();
1813         if (rc)
1814                 goto out_err;
1815
1816         rc = -ENOMEM;
1817
1818         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1819
1820         kvm->arch.use_esca = 0; /* start with basic SCA */
1821         if (!sclp.has_64bscao)
1822                 alloc_flags |= GFP_DMA;
1823         rwlock_init(&kvm->arch.sca_lock);
1824         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1825         if (!kvm->arch.sca)
1826                 goto out_err;
1827         spin_lock(&kvm_lock);
1828         sca_offset += 16;
1829         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1830                 sca_offset = 0;
1831         kvm->arch.sca = (struct bsca_block *)
1832                         ((char *) kvm->arch.sca + sca_offset);
1833         spin_unlock(&kvm_lock);
1834
1835         sprintf(debug_name, "kvm-%u", current->pid);
1836
1837         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1838         if (!kvm->arch.dbf)
1839                 goto out_err;
1840
1841         kvm->arch.sie_page2 =
1842              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1843         if (!kvm->arch.sie_page2)
1844                 goto out_err;
1845
1846         /* Populate the facility mask initially. */
1847         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1848                sizeof(S390_lowcore.stfle_fac_list));
1849         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1850                 if (i < kvm_s390_fac_list_mask_size())
1851                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1852                 else
1853                         kvm->arch.model.fac_mask[i] = 0UL;
1854         }
1855
1856         /* Populate the facility list initially. */
1857         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1858         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1859                S390_ARCH_FAC_LIST_SIZE_BYTE);
1860
1861         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1862         set_kvm_facility(kvm->arch.model.fac_list, 74);
1863
1864         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1865         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1866
1867         kvm_s390_crypto_init(kvm);
1868
1869         mutex_init(&kvm->arch.float_int.ais_lock);
1870         kvm->arch.float_int.simm = 0;
1871         kvm->arch.float_int.nimm = 0;
1872         spin_lock_init(&kvm->arch.float_int.lock);
1873         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1874                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1875         init_waitqueue_head(&kvm->arch.ipte_wq);
1876         mutex_init(&kvm->arch.ipte_mutex);
1877
1878         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1879         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1880
1881         if (type & KVM_VM_S390_UCONTROL) {
1882                 kvm->arch.gmap = NULL;
1883                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1884         } else {
1885                 if (sclp.hamax == U64_MAX)
1886                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1887                 else
1888                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1889                                                     sclp.hamax + 1);
1890                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1891                 if (!kvm->arch.gmap)
1892                         goto out_err;
1893                 kvm->arch.gmap->private = kvm;
1894                 kvm->arch.gmap->pfault_enabled = 0;
1895         }
1896
1897         kvm->arch.css_support = 0;
1898         kvm->arch.use_irqchip = 0;
1899         kvm->arch.epoch = 0;
1900
1901         spin_lock_init(&kvm->arch.start_stop_lock);
1902         kvm_s390_vsie_init(kvm);
1903         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1904
1905         return 0;
1906 out_err:
1907         free_page((unsigned long)kvm->arch.sie_page2);
1908         debug_unregister(kvm->arch.dbf);
1909         sca_dispose(kvm);
1910         KVM_EVENT(3, "creation of vm failed: %d", rc);
1911         return rc;
1912 }
1913
1914 bool kvm_arch_has_vcpu_debugfs(void)
1915 {
1916         return false;
1917 }
1918
1919 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1920 {
1921         return 0;
1922 }
1923
1924 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1925 {
1926         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1927         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1928         kvm_s390_clear_local_irqs(vcpu);
1929         kvm_clear_async_pf_completion_queue(vcpu);
1930         if (!kvm_is_ucontrol(vcpu->kvm))
1931                 sca_del_vcpu(vcpu);
1932
1933         if (kvm_is_ucontrol(vcpu->kvm))
1934                 gmap_remove(vcpu->arch.gmap);
1935
1936         if (vcpu->kvm->arch.use_cmma)
1937                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1938         free_page((unsigned long)(vcpu->arch.sie_block));
1939
1940         kvm_vcpu_uninit(vcpu);
1941         kmem_cache_free(kvm_vcpu_cache, vcpu);
1942 }
1943
1944 static void kvm_free_vcpus(struct kvm *kvm)
1945 {
1946         unsigned int i;
1947         struct kvm_vcpu *vcpu;
1948
1949         kvm_for_each_vcpu(i, vcpu, kvm)
1950                 kvm_arch_vcpu_destroy(vcpu);
1951
1952         mutex_lock(&kvm->lock);
1953         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1954                 kvm->vcpus[i] = NULL;
1955
1956         atomic_set(&kvm->online_vcpus, 0);
1957         mutex_unlock(&kvm->lock);
1958 }
1959
1960 void kvm_arch_destroy_vm(struct kvm *kvm)
1961 {
1962         kvm_free_vcpus(kvm);
1963         sca_dispose(kvm);
1964         debug_unregister(kvm->arch.dbf);
1965         free_page((unsigned long)kvm->arch.sie_page2);
1966         if (!kvm_is_ucontrol(kvm))
1967                 gmap_remove(kvm->arch.gmap);
1968         kvm_s390_destroy_adapters(kvm);
1969         kvm_s390_clear_float_irqs(kvm);
1970         kvm_s390_vsie_destroy(kvm);
1971         if (kvm->arch.migration_state) {
1972                 vfree(kvm->arch.migration_state->pgste_bitmap);
1973                 kfree(kvm->arch.migration_state);
1974         }
1975         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1976 }
1977
1978 /* Section: vcpu related */
1979 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1980 {
1981         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1982         if (!vcpu->arch.gmap)
1983                 return -ENOMEM;
1984         vcpu->arch.gmap->private = vcpu->kvm;
1985
1986         return 0;
1987 }
1988
1989 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1990 {
1991         if (!kvm_s390_use_sca_entries())
1992                 return;
1993         read_lock(&vcpu->kvm->arch.sca_lock);
1994         if (vcpu->kvm->arch.use_esca) {
1995                 struct esca_block *sca = vcpu->kvm->arch.sca;
1996
1997                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1998                 sca->cpu[vcpu->vcpu_id].sda = 0;
1999         } else {
2000                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2001
2002                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2003                 sca->cpu[vcpu->vcpu_id].sda = 0;
2004         }
2005         read_unlock(&vcpu->kvm->arch.sca_lock);
2006 }
2007
2008 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2009 {
2010         if (!kvm_s390_use_sca_entries()) {
2011                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2012
2013                 /* we still need the basic sca for the ipte control */
2014                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2015                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2016         }
2017         read_lock(&vcpu->kvm->arch.sca_lock);
2018         if (vcpu->kvm->arch.use_esca) {
2019                 struct esca_block *sca = vcpu->kvm->arch.sca;
2020
2021                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2022                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2023                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2024                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2025                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2026         } else {
2027                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2028
2029                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2030                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2031                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2032                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2033         }
2034         read_unlock(&vcpu->kvm->arch.sca_lock);
2035 }
2036
2037 /* Basic SCA to Extended SCA data copy routines */
2038 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2039 {
2040         d->sda = s->sda;
2041         d->sigp_ctrl.c = s->sigp_ctrl.c;
2042         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2043 }
2044
2045 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2046 {
2047         int i;
2048
2049         d->ipte_control = s->ipte_control;
2050         d->mcn[0] = s->mcn;
2051         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2052                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2053 }
2054
2055 static int sca_switch_to_extended(struct kvm *kvm)
2056 {
2057         struct bsca_block *old_sca = kvm->arch.sca;
2058         struct esca_block *new_sca;
2059         struct kvm_vcpu *vcpu;
2060         unsigned int vcpu_idx;
2061         u32 scaol, scaoh;
2062
2063         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2064         if (!new_sca)
2065                 return -ENOMEM;
2066
2067         scaoh = (u32)((u64)(new_sca) >> 32);
2068         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2069
2070         kvm_s390_vcpu_block_all(kvm);
2071         write_lock(&kvm->arch.sca_lock);
2072
2073         sca_copy_b_to_e(new_sca, old_sca);
2074
2075         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2076                 vcpu->arch.sie_block->scaoh = scaoh;
2077                 vcpu->arch.sie_block->scaol = scaol;
2078                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2079         }
2080         kvm->arch.sca = new_sca;
2081         kvm->arch.use_esca = 1;
2082
2083         write_unlock(&kvm->arch.sca_lock);
2084         kvm_s390_vcpu_unblock_all(kvm);
2085
2086         free_page((unsigned long)old_sca);
2087
2088         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2089                  old_sca, kvm->arch.sca);
2090         return 0;
2091 }
2092
2093 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2094 {
2095         int rc;
2096
2097         if (!kvm_s390_use_sca_entries()) {
2098                 if (id < KVM_MAX_VCPUS)
2099                         return true;
2100                 return false;
2101         }
2102         if (id < KVM_S390_BSCA_CPU_SLOTS)
2103                 return true;
2104         if (!sclp.has_esca || !sclp.has_64bscao)
2105                 return false;
2106
2107         mutex_lock(&kvm->lock);
2108         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2109         mutex_unlock(&kvm->lock);
2110
2111         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2112 }
2113
2114 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2115 {
2116         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2117         kvm_clear_async_pf_completion_queue(vcpu);
2118         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2119                                     KVM_SYNC_GPRS |
2120                                     KVM_SYNC_ACRS |
2121                                     KVM_SYNC_CRS |
2122                                     KVM_SYNC_ARCH0 |
2123                                     KVM_SYNC_PFAULT;
2124         kvm_s390_set_prefix(vcpu, 0);
2125         if (test_kvm_facility(vcpu->kvm, 64))
2126                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2127         if (test_kvm_facility(vcpu->kvm, 133))
2128                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2129         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2130          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2131          */
2132         if (MACHINE_HAS_VX)
2133                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2134         else
2135                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2136
2137         if (kvm_is_ucontrol(vcpu->kvm))
2138                 return __kvm_ucontrol_vcpu_init(vcpu);
2139
2140         return 0;
2141 }
2142
2143 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2144 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2145 {
2146         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2147         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2148         vcpu->arch.cputm_start = get_tod_clock_fast();
2149         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2150 }
2151
2152 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2153 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2154 {
2155         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2156         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2157         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2158         vcpu->arch.cputm_start = 0;
2159         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2160 }
2161
2162 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2163 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2164 {
2165         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2166         vcpu->arch.cputm_enabled = true;
2167         __start_cpu_timer_accounting(vcpu);
2168 }
2169
2170 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2171 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2172 {
2173         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2174         __stop_cpu_timer_accounting(vcpu);
2175         vcpu->arch.cputm_enabled = false;
2176 }
2177
2178 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2179 {
2180         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2181         __enable_cpu_timer_accounting(vcpu);
2182         preempt_enable();
2183 }
2184
2185 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2186 {
2187         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2188         __disable_cpu_timer_accounting(vcpu);
2189         preempt_enable();
2190 }
2191
2192 /* set the cpu timer - may only be called from the VCPU thread itself */
2193 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2194 {
2195         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2196         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2197         if (vcpu->arch.cputm_enabled)
2198                 vcpu->arch.cputm_start = get_tod_clock_fast();
2199         vcpu->arch.sie_block->cputm = cputm;
2200         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2201         preempt_enable();
2202 }
2203
2204 /* update and get the cpu timer - can also be called from other VCPU threads */
2205 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2206 {
2207         unsigned int seq;
2208         __u64 value;
2209
2210         if (unlikely(!vcpu->arch.cputm_enabled))
2211                 return vcpu->arch.sie_block->cputm;
2212
2213         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2214         do {
2215                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2216                 /*
2217                  * If the writer would ever execute a read in the critical
2218                  * section, e.g. in irq context, we have a deadlock.
2219                  */
2220                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2221                 value = vcpu->arch.sie_block->cputm;
2222                 /* if cputm_start is 0, accounting is being started/stopped */
2223                 if (likely(vcpu->arch.cputm_start))
2224                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2225         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2226         preempt_enable();
2227         return value;
2228 }
2229
2230 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2231 {
2232
2233         gmap_enable(vcpu->arch.enabled_gmap);
2234         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2235         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2236                 __start_cpu_timer_accounting(vcpu);
2237         vcpu->cpu = cpu;
2238 }
2239
2240 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2241 {
2242         vcpu->cpu = -1;
2243         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2244                 __stop_cpu_timer_accounting(vcpu);
2245         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2246         vcpu->arch.enabled_gmap = gmap_get_enabled();
2247         gmap_disable(vcpu->arch.enabled_gmap);
2248
2249 }
2250
2251 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2252 {
2253         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2254         vcpu->arch.sie_block->gpsw.mask = 0UL;
2255         vcpu->arch.sie_block->gpsw.addr = 0UL;
2256         kvm_s390_set_prefix(vcpu, 0);
2257         kvm_s390_set_cpu_timer(vcpu, 0);
2258         vcpu->arch.sie_block->ckc       = 0UL;
2259         vcpu->arch.sie_block->todpr     = 0;
2260         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2261         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2262         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2263         /* make sure the new fpc will be lazily loaded */
2264         save_fpu_regs();
2265         current->thread.fpu.fpc = 0;
2266         vcpu->arch.sie_block->gbea = 1;
2267         vcpu->arch.sie_block->pp = 0;
2268         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2269         kvm_clear_async_pf_completion_queue(vcpu);
2270         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2271                 kvm_s390_vcpu_stop(vcpu);
2272         kvm_s390_clear_local_irqs(vcpu);
2273 }
2274
2275 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2276 {
2277         mutex_lock(&vcpu->kvm->lock);
2278         preempt_disable();
2279         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2280         preempt_enable();
2281         mutex_unlock(&vcpu->kvm->lock);
2282         if (!kvm_is_ucontrol(vcpu->kvm)) {
2283                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2284                 sca_add_vcpu(vcpu);
2285         }
2286         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2287                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2288         /* make vcpu_load load the right gmap on the first trigger */
2289         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2290 }
2291
2292 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2293 {
2294         if (!test_kvm_facility(vcpu->kvm, 76))
2295                 return;
2296
2297         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2298
2299         if (vcpu->kvm->arch.crypto.aes_kw)
2300                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2301         if (vcpu->kvm->arch.crypto.dea_kw)
2302                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2303
2304         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2305 }
2306
2307 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2308 {
2309         free_page(vcpu->arch.sie_block->cbrlo);
2310         vcpu->arch.sie_block->cbrlo = 0;
2311 }
2312
2313 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2314 {
2315         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2316         if (!vcpu->arch.sie_block->cbrlo)
2317                 return -ENOMEM;
2318
2319         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2320         return 0;
2321 }
2322
2323 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2324 {
2325         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2326
2327         vcpu->arch.sie_block->ibc = model->ibc;
2328         if (test_kvm_facility(vcpu->kvm, 7))
2329                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2330 }
2331
2332 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2333 {
2334         int rc = 0;
2335
2336         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2337                                                     CPUSTAT_SM |
2338                                                     CPUSTAT_STOPPED);
2339
2340         if (test_kvm_facility(vcpu->kvm, 78))
2341                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2342         else if (test_kvm_facility(vcpu->kvm, 8))
2343                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2344
2345         kvm_s390_vcpu_setup_model(vcpu);
2346
2347         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2348         if (MACHINE_HAS_ESOP)
2349                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2350         if (test_kvm_facility(vcpu->kvm, 9))
2351                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2352         if (test_kvm_facility(vcpu->kvm, 73))
2353                 vcpu->arch.sie_block->ecb |= ECB_TE;
2354
2355         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2356                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2357         if (test_kvm_facility(vcpu->kvm, 130))
2358                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2359         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2360         if (sclp.has_cei)
2361                 vcpu->arch.sie_block->eca |= ECA_CEI;
2362         if (sclp.has_ib)
2363                 vcpu->arch.sie_block->eca |= ECA_IB;
2364         if (sclp.has_siif)
2365                 vcpu->arch.sie_block->eca |= ECA_SII;
2366         if (sclp.has_sigpif)
2367                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2368         if (test_kvm_facility(vcpu->kvm, 129)) {
2369                 vcpu->arch.sie_block->eca |= ECA_VX;
2370                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2371         }
2372         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2373                                         | SDNXC;
2374         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2375
2376         if (sclp.has_kss)
2377                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2378         else
2379                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2380
2381         if (vcpu->kvm->arch.use_cmma) {
2382                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2383                 if (rc)
2384                         return rc;
2385         }
2386         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2387         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2388
2389         kvm_s390_vcpu_crypto_setup(vcpu);
2390
2391         return rc;
2392 }
2393
2394 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2395                                       unsigned int id)
2396 {
2397         struct kvm_vcpu *vcpu;
2398         struct sie_page *sie_page;
2399         int rc = -EINVAL;
2400
2401         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2402                 goto out;
2403
2404         rc = -ENOMEM;
2405
2406         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2407         if (!vcpu)
2408                 goto out;
2409
2410         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2411         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2412         if (!sie_page)
2413                 goto out_free_cpu;
2414
2415         vcpu->arch.sie_block = &sie_page->sie_block;
2416         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2417
2418         /* the real guest size will always be smaller than msl */
2419         vcpu->arch.sie_block->mso = 0;
2420         vcpu->arch.sie_block->msl = sclp.hamax;
2421
2422         vcpu->arch.sie_block->icpua = id;
2423         spin_lock_init(&vcpu->arch.local_int.lock);
2424         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2425         vcpu->arch.local_int.wq = &vcpu->wq;
2426         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2427         seqcount_init(&vcpu->arch.cputm_seqcount);
2428
2429         rc = kvm_vcpu_init(vcpu, kvm, id);
2430         if (rc)
2431                 goto out_free_sie_block;
2432         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2433                  vcpu->arch.sie_block);
2434         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2435
2436         return vcpu;
2437 out_free_sie_block:
2438         free_page((unsigned long)(vcpu->arch.sie_block));
2439 out_free_cpu:
2440         kmem_cache_free(kvm_vcpu_cache, vcpu);
2441 out:
2442         return ERR_PTR(rc);
2443 }
2444
2445 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2446 {
2447         return kvm_s390_vcpu_has_irq(vcpu, 0);
2448 }
2449
2450 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2451 {
2452         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2453 }
2454
2455 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2456 {
2457         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2458         exit_sie(vcpu);
2459 }
2460
2461 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2462 {
2463         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2464 }
2465
2466 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2467 {
2468         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2469         exit_sie(vcpu);
2470 }
2471
2472 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2473 {
2474         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2475 }
2476
2477 /*
2478  * Kick a guest cpu out of SIE and wait until SIE is not running.
2479  * If the CPU is not running (e.g. waiting as idle) the function will
2480  * return immediately. */
2481 void exit_sie(struct kvm_vcpu *vcpu)
2482 {
2483         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2484         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2485                 cpu_relax();
2486 }
2487
2488 /* Kick a guest cpu out of SIE to process a request synchronously */
2489 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2490 {
2491         kvm_make_request(req, vcpu);
2492         kvm_s390_vcpu_request(vcpu);
2493 }
2494
2495 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2496                               unsigned long end)
2497 {
2498         struct kvm *kvm = gmap->private;
2499         struct kvm_vcpu *vcpu;
2500         unsigned long prefix;
2501         int i;
2502
2503         if (gmap_is_shadow(gmap))
2504                 return;
2505         if (start >= 1UL << 31)
2506                 /* We are only interested in prefix pages */
2507                 return;
2508         kvm_for_each_vcpu(i, vcpu, kvm) {
2509                 /* match against both prefix pages */
2510                 prefix = kvm_s390_get_prefix(vcpu);
2511                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2512                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2513                                    start, end);
2514                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2515                 }
2516         }
2517 }
2518
2519 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2520 {
2521         /* kvm common code refers to this, but never calls it */
2522         BUG();
2523         return 0;
2524 }
2525
2526 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2527                                            struct kvm_one_reg *reg)
2528 {
2529         int r = -EINVAL;
2530
2531         switch (reg->id) {
2532         case KVM_REG_S390_TODPR:
2533                 r = put_user(vcpu->arch.sie_block->todpr,
2534                              (u32 __user *)reg->addr);
2535                 break;
2536         case KVM_REG_S390_EPOCHDIFF:
2537                 r = put_user(vcpu->arch.sie_block->epoch,
2538                              (u64 __user *)reg->addr);
2539                 break;
2540         case KVM_REG_S390_CPU_TIMER:
2541                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2542                              (u64 __user *)reg->addr);
2543                 break;
2544         case KVM_REG_S390_CLOCK_COMP:
2545                 r = put_user(vcpu->arch.sie_block->ckc,
2546                              (u64 __user *)reg->addr);
2547                 break;
2548         case KVM_REG_S390_PFTOKEN:
2549                 r = put_user(vcpu->arch.pfault_token,
2550                              (u64 __user *)reg->addr);
2551                 break;
2552         case KVM_REG_S390_PFCOMPARE:
2553                 r = put_user(vcpu->arch.pfault_compare,
2554                              (u64 __user *)reg->addr);
2555                 break;
2556         case KVM_REG_S390_PFSELECT:
2557                 r = put_user(vcpu->arch.pfault_select,
2558                              (u64 __user *)reg->addr);
2559                 break;
2560         case KVM_REG_S390_PP:
2561                 r = put_user(vcpu->arch.sie_block->pp,
2562                              (u64 __user *)reg->addr);
2563                 break;
2564         case KVM_REG_S390_GBEA:
2565                 r = put_user(vcpu->arch.sie_block->gbea,
2566                              (u64 __user *)reg->addr);
2567                 break;
2568         default:
2569                 break;
2570         }
2571
2572         return r;
2573 }
2574
2575 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2576                                            struct kvm_one_reg *reg)
2577 {
2578         int r = -EINVAL;
2579         __u64 val;
2580
2581         switch (reg->id) {
2582         case KVM_REG_S390_TODPR:
2583                 r = get_user(vcpu->arch.sie_block->todpr,
2584                              (u32 __user *)reg->addr);
2585                 break;
2586         case KVM_REG_S390_EPOCHDIFF:
2587                 r = get_user(vcpu->arch.sie_block->epoch,
2588                              (u64 __user *)reg->addr);
2589                 break;
2590         case KVM_REG_S390_CPU_TIMER:
2591                 r = get_user(val, (u64 __user *)reg->addr);
2592                 if (!r)
2593                         kvm_s390_set_cpu_timer(vcpu, val);
2594                 break;
2595         case KVM_REG_S390_CLOCK_COMP:
2596                 r = get_user(vcpu->arch.sie_block->ckc,
2597                              (u64 __user *)reg->addr);
2598                 break;
2599         case KVM_REG_S390_PFTOKEN:
2600                 r = get_user(vcpu->arch.pfault_token,
2601                              (u64 __user *)reg->addr);
2602                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2603                         kvm_clear_async_pf_completion_queue(vcpu);
2604                 break;
2605         case KVM_REG_S390_PFCOMPARE:
2606                 r = get_user(vcpu->arch.pfault_compare,
2607                              (u64 __user *)reg->addr);
2608                 break;
2609         case KVM_REG_S390_PFSELECT:
2610                 r = get_user(vcpu->arch.pfault_select,
2611                              (u64 __user *)reg->addr);
2612                 break;
2613         case KVM_REG_S390_PP:
2614                 r = get_user(vcpu->arch.sie_block->pp,
2615                              (u64 __user *)reg->addr);
2616                 break;
2617         case KVM_REG_S390_GBEA:
2618                 r = get_user(vcpu->arch.sie_block->gbea,
2619                              (u64 __user *)reg->addr);
2620                 break;
2621         default:
2622                 break;
2623         }
2624
2625         return r;
2626 }
2627
2628 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2629 {
2630         kvm_s390_vcpu_initial_reset(vcpu);
2631         return 0;
2632 }
2633
2634 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2635 {
2636         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2637         return 0;
2638 }
2639
2640 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2641 {
2642         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2643         return 0;
2644 }
2645
2646 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2647                                   struct kvm_sregs *sregs)
2648 {
2649         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2650         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2651         return 0;
2652 }
2653
2654 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2655                                   struct kvm_sregs *sregs)
2656 {
2657         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2658         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2659         return 0;
2660 }
2661
2662 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2663 {
2664         if (test_fp_ctl(fpu->fpc))
2665                 return -EINVAL;
2666         vcpu->run->s.regs.fpc = fpu->fpc;
2667         if (MACHINE_HAS_VX)
2668                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2669                                  (freg_t *) fpu->fprs);
2670         else
2671                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2672         return 0;
2673 }
2674
2675 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2676 {
2677         /* make sure we have the latest values */
2678         save_fpu_regs();
2679         if (MACHINE_HAS_VX)
2680                 convert_vx_to_fp((freg_t *) fpu->fprs,
2681                                  (__vector128 *) vcpu->run->s.regs.vrs);
2682         else
2683                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2684         fpu->fpc = vcpu->run->s.regs.fpc;
2685         return 0;
2686 }
2687
2688 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2689 {
2690         int rc = 0;
2691
2692         if (!is_vcpu_stopped(vcpu))
2693                 rc = -EBUSY;
2694         else {
2695                 vcpu->run->psw_mask = psw.mask;
2696                 vcpu->run->psw_addr = psw.addr;
2697         }
2698         return rc;
2699 }
2700
2701 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2702                                   struct kvm_translation *tr)
2703 {
2704         return -EINVAL; /* not implemented yet */
2705 }
2706
2707 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2708                               KVM_GUESTDBG_USE_HW_BP | \
2709                               KVM_GUESTDBG_ENABLE)
2710
2711 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2712                                         struct kvm_guest_debug *dbg)
2713 {
2714         int rc = 0;
2715
2716         vcpu->guest_debug = 0;
2717         kvm_s390_clear_bp_data(vcpu);
2718
2719         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2720                 return -EINVAL;
2721         if (!sclp.has_gpere)
2722                 return -EINVAL;
2723
2724         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2725                 vcpu->guest_debug = dbg->control;
2726                 /* enforce guest PER */
2727                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2728
2729                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2730                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2731         } else {
2732                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2733                 vcpu->arch.guestdbg.last_bp = 0;
2734         }
2735
2736         if (rc) {
2737                 vcpu->guest_debug = 0;
2738                 kvm_s390_clear_bp_data(vcpu);
2739                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2740         }
2741
2742         return rc;
2743 }
2744
2745 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2746                                     struct kvm_mp_state *mp_state)
2747 {
2748         /* CHECK_STOP and LOAD are not supported yet */
2749         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2750                                        KVM_MP_STATE_OPERATING;
2751 }
2752
2753 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2754                                     struct kvm_mp_state *mp_state)
2755 {
2756         int rc = 0;
2757
2758         /* user space knows about this interface - let it control the state */
2759         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2760
2761         switch (mp_state->mp_state) {
2762         case KVM_MP_STATE_STOPPED:
2763                 kvm_s390_vcpu_stop(vcpu);
2764                 break;
2765         case KVM_MP_STATE_OPERATING:
2766                 kvm_s390_vcpu_start(vcpu);
2767                 break;
2768         case KVM_MP_STATE_LOAD:
2769         case KVM_MP_STATE_CHECK_STOP:
2770                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2771         default:
2772                 rc = -ENXIO;
2773         }
2774
2775         return rc;
2776 }
2777
2778 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2779 {
2780         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2781 }
2782
2783 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2784 {
2785 retry:
2786         kvm_s390_vcpu_request_handled(vcpu);
2787         if (!kvm_request_pending(vcpu))
2788                 return 0;
2789         /*
2790          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2791          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2792          * This ensures that the ipte instruction for this request has
2793          * already finished. We might race against a second unmapper that
2794          * wants to set the blocking bit. Lets just retry the request loop.
2795          */
2796         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2797                 int rc;
2798                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2799                                           kvm_s390_get_prefix(vcpu),
2800                                           PAGE_SIZE * 2, PROT_WRITE);
2801                 if (rc) {
2802                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2803                         return rc;
2804                 }
2805                 goto retry;
2806         }
2807
2808         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2809                 vcpu->arch.sie_block->ihcpu = 0xffff;
2810                 goto retry;
2811         }
2812
2813         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2814                 if (!ibs_enabled(vcpu)) {
2815                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2816                         atomic_or(CPUSTAT_IBS,
2817                                         &vcpu->arch.sie_block->cpuflags);
2818                 }
2819                 goto retry;
2820         }
2821
2822         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2823                 if (ibs_enabled(vcpu)) {
2824                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2825                         atomic_andnot(CPUSTAT_IBS,
2826                                           &vcpu->arch.sie_block->cpuflags);
2827                 }
2828                 goto retry;
2829         }
2830
2831         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2832                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2833                 goto retry;
2834         }
2835
2836         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2837                 /*
2838                  * Disable CMMA virtualization; we will emulate the ESSA
2839                  * instruction manually, in order to provide additional
2840                  * functionalities needed for live migration.
2841                  */
2842                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2843                 goto retry;
2844         }
2845
2846         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2847                 /*
2848                  * Re-enable CMMA virtualization if CMMA is available and
2849                  * was used.
2850                  */
2851                 if ((vcpu->kvm->arch.use_cmma) &&
2852                     (vcpu->kvm->mm->context.use_cmma))
2853                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2854                 goto retry;
2855         }
2856
2857         /* nothing to do, just clear the request */
2858         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2859
2860         return 0;
2861 }
2862
2863 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2864 {
2865         struct kvm_vcpu *vcpu;
2866         int i;
2867
2868         mutex_lock(&kvm->lock);
2869         preempt_disable();
2870         kvm->arch.epoch = tod - get_tod_clock();
2871         kvm_s390_vcpu_block_all(kvm);
2872         kvm_for_each_vcpu(i, vcpu, kvm)
2873                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2874         kvm_s390_vcpu_unblock_all(kvm);
2875         preempt_enable();
2876         mutex_unlock(&kvm->lock);
2877 }
2878
2879 /**
2880  * kvm_arch_fault_in_page - fault-in guest page if necessary
2881  * @vcpu: The corresponding virtual cpu
2882  * @gpa: Guest physical address
2883  * @writable: Whether the page should be writable or not
2884  *
2885  * Make sure that a guest page has been faulted-in on the host.
2886  *
2887  * Return: Zero on success, negative error code otherwise.
2888  */
2889 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2890 {
2891         return gmap_fault(vcpu->arch.gmap, gpa,
2892                           writable ? FAULT_FLAG_WRITE : 0);
2893 }
2894
2895 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2896                                       unsigned long token)
2897 {
2898         struct kvm_s390_interrupt inti;
2899         struct kvm_s390_irq irq;
2900
2901         if (start_token) {
2902                 irq.u.ext.ext_params2 = token;
2903                 irq.type = KVM_S390_INT_PFAULT_INIT;
2904                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2905         } else {
2906                 inti.type = KVM_S390_INT_PFAULT_DONE;
2907                 inti.parm64 = token;
2908                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2909         }
2910 }
2911
2912 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2913                                      struct kvm_async_pf *work)
2914 {
2915         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2916         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2917 }
2918
2919 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2920                                  struct kvm_async_pf *work)
2921 {
2922         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2923         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2924 }
2925
2926 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2927                                struct kvm_async_pf *work)
2928 {
2929         /* s390 will always inject the page directly */
2930 }
2931
2932 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2933 {
2934         /*
2935          * s390 will always inject the page directly,
2936          * but we still want check_async_completion to cleanup
2937          */
2938         return true;
2939 }
2940
2941 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2942 {
2943         hva_t hva;
2944         struct kvm_arch_async_pf arch;
2945         int rc;
2946
2947         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2948                 return 0;
2949         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2950             vcpu->arch.pfault_compare)
2951                 return 0;
2952         if (psw_extint_disabled(vcpu))
2953                 return 0;
2954         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2955                 return 0;
2956         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2957                 return 0;
2958         if (!vcpu->arch.gmap->pfault_enabled)
2959                 return 0;
2960
2961         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2962         hva += current->thread.gmap_addr & ~PAGE_MASK;
2963         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2964                 return 0;
2965
2966         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2967         return rc;
2968 }
2969
2970 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2971 {
2972         int rc, cpuflags;
2973
2974         /*
2975          * On s390 notifications for arriving pages will be delivered directly
2976          * to the guest but the house keeping for completed pfaults is
2977          * handled outside the worker.
2978          */
2979         kvm_check_async_pf_completion(vcpu);
2980
2981         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2982         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2983
2984         if (need_resched())
2985                 schedule();
2986
2987         if (test_cpu_flag(CIF_MCCK_PENDING))
2988                 s390_handle_mcck();
2989
2990         if (!kvm_is_ucontrol(vcpu->kvm)) {
2991                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2992                 if (rc)
2993                         return rc;
2994         }
2995
2996         rc = kvm_s390_handle_requests(vcpu);
2997         if (rc)
2998                 return rc;
2999
3000         if (guestdbg_enabled(vcpu)) {
3001                 kvm_s390_backup_guest_per_regs(vcpu);
3002                 kvm_s390_patch_guest_per_regs(vcpu);
3003         }
3004
3005         vcpu->arch.sie_block->icptcode = 0;
3006         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3007         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3008         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3009
3010         return 0;
3011 }
3012
3013 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3014 {
3015         struct kvm_s390_pgm_info pgm_info = {
3016                 .code = PGM_ADDRESSING,
3017         };
3018         u8 opcode, ilen;
3019         int rc;
3020
3021         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3022         trace_kvm_s390_sie_fault(vcpu);
3023
3024         /*
3025          * We want to inject an addressing exception, which is defined as a
3026          * suppressing or terminating exception. However, since we came here
3027          * by a DAT access exception, the PSW still points to the faulting
3028          * instruction since DAT exceptions are nullifying. So we've got
3029          * to look up the current opcode to get the length of the instruction
3030          * to be able to forward the PSW.
3031          */
3032         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3033         ilen = insn_length(opcode);
3034         if (rc < 0) {
3035                 return rc;
3036         } else if (rc) {
3037                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3038                  * Forward by arbitrary ilc, injection will take care of
3039                  * nullification if necessary.
3040                  */
3041                 pgm_info = vcpu->arch.pgm;
3042                 ilen = 4;
3043         }
3044         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3045         kvm_s390_forward_psw(vcpu, ilen);
3046         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3047 }
3048
3049 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3050 {
3051         struct mcck_volatile_info *mcck_info;
3052         struct sie_page *sie_page;
3053
3054         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3055                    vcpu->arch.sie_block->icptcode);
3056         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3057
3058         if (guestdbg_enabled(vcpu))
3059                 kvm_s390_restore_guest_per_regs(vcpu);
3060
3061         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3062         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3063
3064         if (exit_reason == -EINTR) {
3065                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3066                 sie_page = container_of(vcpu->arch.sie_block,
3067                                         struct sie_page, sie_block);
3068                 mcck_info = &sie_page->mcck_info;
3069                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3070                 return 0;
3071         }
3072
3073         if (vcpu->arch.sie_block->icptcode > 0) {
3074                 int rc = kvm_handle_sie_intercept(vcpu);
3075
3076                 if (rc != -EOPNOTSUPP)
3077                         return rc;
3078                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3079                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3080                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3081                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3082                 return -EREMOTE;
3083         } else if (exit_reason != -EFAULT) {
3084                 vcpu->stat.exit_null++;
3085                 return 0;
3086         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3087                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3088                 vcpu->run->s390_ucontrol.trans_exc_code =
3089                                                 current->thread.gmap_addr;
3090                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3091                 return -EREMOTE;
3092         } else if (current->thread.gmap_pfault) {
3093                 trace_kvm_s390_major_guest_pfault(vcpu);
3094                 current->thread.gmap_pfault = 0;
3095                 if (kvm_arch_setup_async_pf(vcpu))
3096                         return 0;
3097                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3098         }
3099         return vcpu_post_run_fault_in_sie(vcpu);
3100 }
3101
3102 static int __vcpu_run(struct kvm_vcpu *vcpu)
3103 {
3104         int rc, exit_reason;
3105
3106         /*
3107          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3108          * ning the guest), so that memslots (and other stuff) are protected
3109          */
3110         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3111
3112         do {
3113                 rc = vcpu_pre_run(vcpu);
3114                 if (rc)
3115                         break;
3116
3117                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3118                 /*
3119                  * As PF_VCPU will be used in fault handler, between
3120                  * guest_enter and guest_exit should be no uaccess.
3121                  */
3122                 local_irq_disable();
3123                 guest_enter_irqoff();
3124                 __disable_cpu_timer_accounting(vcpu);
3125                 local_irq_enable();
3126                 exit_reason = sie64a(vcpu->arch.sie_block,
3127                                      vcpu->run->s.regs.gprs);
3128                 local_irq_disable();
3129                 __enable_cpu_timer_accounting(vcpu);
3130                 guest_exit_irqoff();
3131                 local_irq_enable();
3132                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3133
3134                 rc = vcpu_post_run(vcpu, exit_reason);
3135         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3136
3137         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3138         return rc;
3139 }
3140
3141 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3142 {
3143         struct runtime_instr_cb *riccb;
3144         struct gs_cb *gscb;
3145
3146         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3147         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3148         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3149         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3150         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3151                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3152         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3153                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3154                 /* some control register changes require a tlb flush */
3155                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3156         }
3157         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3158                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3159                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3160                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3161                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3162                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3163         }
3164         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3165                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3166                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3167                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3168                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3169                         kvm_clear_async_pf_completion_queue(vcpu);
3170         }
3171         /*
3172          * If userspace sets the riccb (e.g. after migration) to a valid state,
3173          * we should enable RI here instead of doing the lazy enablement.
3174          */
3175         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3176             test_kvm_facility(vcpu->kvm, 64) &&
3177             riccb->valid &&
3178             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3179                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3180                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3181         }
3182         /*
3183          * If userspace sets the gscb (e.g. after migration) to non-zero,
3184          * we should enable GS here instead of doing the lazy enablement.
3185          */
3186         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3187             test_kvm_facility(vcpu->kvm, 133) &&
3188             gscb->gssm &&
3189             !vcpu->arch.gs_enabled) {
3190                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3191                 vcpu->arch.sie_block->ecb |= ECB_GS;
3192                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3193                 vcpu->arch.gs_enabled = 1;
3194         }
3195         save_access_regs(vcpu->arch.host_acrs);
3196         restore_access_regs(vcpu->run->s.regs.acrs);
3197         /* save host (userspace) fprs/vrs */
3198         save_fpu_regs();
3199         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3200         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3201         if (MACHINE_HAS_VX)
3202                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3203         else
3204                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3205         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3206         if (test_fp_ctl(current->thread.fpu.fpc))
3207                 /* User space provided an invalid FPC, let's clear it */
3208                 current->thread.fpu.fpc = 0;
3209         if (MACHINE_HAS_GS) {
3210                 preempt_disable();
3211                 __ctl_set_bit(2, 4);
3212                 if (current->thread.gs_cb) {
3213                         vcpu->arch.host_gscb = current->thread.gs_cb;
3214                         save_gs_cb(vcpu->arch.host_gscb);
3215                 }
3216                 if (vcpu->arch.gs_enabled) {
3217                         current->thread.gs_cb = (struct gs_cb *)
3218                                                 &vcpu->run->s.regs.gscb;
3219                         restore_gs_cb(current->thread.gs_cb);
3220                 }
3221                 preempt_enable();
3222         }
3223
3224         kvm_run->kvm_dirty_regs = 0;
3225 }
3226
3227 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3228 {
3229         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3230         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;