f6579cfde2dfb3ae17b0b9968d209c6d6b0cabd8
[sfrench/cifs-2.6.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <asm/asm-offsets.h>
29 #include <asm/lowcore.h>
30 #include <asm/pgtable.h>
31 #include <asm/nmi.h>
32 #include <asm/switch_to.h>
33 #include <asm/sclp.h>
34 #include "kvm-s390.h"
35 #include "gaccess.h"
36
37 #define CREATE_TRACE_POINTS
38 #include "trace.h"
39 #include "trace-s390.h"
40
41 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
42
43 struct kvm_stats_debugfs_item debugfs_entries[] = {
44         { "userspace_handled", VCPU_STAT(exit_userspace) },
45         { "exit_null", VCPU_STAT(exit_null) },
46         { "exit_validity", VCPU_STAT(exit_validity) },
47         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
48         { "exit_external_request", VCPU_STAT(exit_external_request) },
49         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
50         { "exit_instruction", VCPU_STAT(exit_instruction) },
51         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
52         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
53         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
54         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
55         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
56         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
57         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
58         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
59         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
60         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
61         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
62         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
63         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
64         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
65         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
66         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
67         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
68         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
69         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
70         { "instruction_spx", VCPU_STAT(instruction_spx) },
71         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
72         { "instruction_stap", VCPU_STAT(instruction_stap) },
73         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
74         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
75         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
76         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
77         { "instruction_essa", VCPU_STAT(instruction_essa) },
78         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
79         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
80         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
81         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
82         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
83         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
84         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
85         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
86         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
87         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
88         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
89         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
90         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
91         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
92         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
93         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
94         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
95         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
96         { "diagnose_10", VCPU_STAT(diagnose_10) },
97         { "diagnose_44", VCPU_STAT(diagnose_44) },
98         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
99         { NULL }
100 };
101
102 /* upper facilities limit for kvm */
103 unsigned long kvm_s390_fac_list_mask[] = {
104         0xff82fffbf4fc2000UL,
105         0x005c000000000000UL,
106 };
107
108 unsigned long kvm_s390_fac_list_mask_size(void)
109 {
110         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
111         return ARRAY_SIZE(kvm_s390_fac_list_mask);
112 }
113
114 static struct gmap_notifier gmap_notifier;
115
116 /* Section: not file related */
117 int kvm_arch_hardware_enable(void)
118 {
119         /* every s390 is virtualization enabled ;-) */
120         return 0;
121 }
122
123 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
124
125 int kvm_arch_hardware_setup(void)
126 {
127         gmap_notifier.notifier_call = kvm_gmap_notifier;
128         gmap_register_ipte_notifier(&gmap_notifier);
129         return 0;
130 }
131
132 void kvm_arch_hardware_unsetup(void)
133 {
134         gmap_unregister_ipte_notifier(&gmap_notifier);
135 }
136
137 int kvm_arch_init(void *opaque)
138 {
139         /* Register floating interrupt controller interface. */
140         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
141 }
142
143 /* Section: device related */
144 long kvm_arch_dev_ioctl(struct file *filp,
145                         unsigned int ioctl, unsigned long arg)
146 {
147         if (ioctl == KVM_S390_ENABLE_SIE)
148                 return s390_enable_sie();
149         return -EINVAL;
150 }
151
152 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
153 {
154         int r;
155
156         switch (ext) {
157         case KVM_CAP_S390_PSW:
158         case KVM_CAP_S390_GMAP:
159         case KVM_CAP_SYNC_MMU:
160 #ifdef CONFIG_KVM_S390_UCONTROL
161         case KVM_CAP_S390_UCONTROL:
162 #endif
163         case KVM_CAP_ASYNC_PF:
164         case KVM_CAP_SYNC_REGS:
165         case KVM_CAP_ONE_REG:
166         case KVM_CAP_ENABLE_CAP:
167         case KVM_CAP_S390_CSS_SUPPORT:
168         case KVM_CAP_IRQFD:
169         case KVM_CAP_IOEVENTFD:
170         case KVM_CAP_DEVICE_CTRL:
171         case KVM_CAP_ENABLE_CAP_VM:
172         case KVM_CAP_S390_IRQCHIP:
173         case KVM_CAP_VM_ATTRIBUTES:
174         case KVM_CAP_MP_STATE:
175         case KVM_CAP_S390_USER_SIGP:
176                 r = 1;
177                 break;
178         case KVM_CAP_NR_VCPUS:
179         case KVM_CAP_MAX_VCPUS:
180                 r = KVM_MAX_VCPUS;
181                 break;
182         case KVM_CAP_NR_MEMSLOTS:
183                 r = KVM_USER_MEM_SLOTS;
184                 break;
185         case KVM_CAP_S390_COW:
186                 r = MACHINE_HAS_ESOP;
187                 break;
188         default:
189                 r = 0;
190         }
191         return r;
192 }
193
194 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
195                                         struct kvm_memory_slot *memslot)
196 {
197         gfn_t cur_gfn, last_gfn;
198         unsigned long address;
199         struct gmap *gmap = kvm->arch.gmap;
200
201         down_read(&gmap->mm->mmap_sem);
202         /* Loop over all guest pages */
203         last_gfn = memslot->base_gfn + memslot->npages;
204         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
205                 address = gfn_to_hva_memslot(memslot, cur_gfn);
206
207                 if (gmap_test_and_clear_dirty(address, gmap))
208                         mark_page_dirty(kvm, cur_gfn);
209         }
210         up_read(&gmap->mm->mmap_sem);
211 }
212
213 /* Section: vm related */
214 /*
215  * Get (and clear) the dirty memory log for a memory slot.
216  */
217 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
218                                struct kvm_dirty_log *log)
219 {
220         int r;
221         unsigned long n;
222         struct kvm_memory_slot *memslot;
223         int is_dirty = 0;
224
225         mutex_lock(&kvm->slots_lock);
226
227         r = -EINVAL;
228         if (log->slot >= KVM_USER_MEM_SLOTS)
229                 goto out;
230
231         memslot = id_to_memslot(kvm->memslots, log->slot);
232         r = -ENOENT;
233         if (!memslot->dirty_bitmap)
234                 goto out;
235
236         kvm_s390_sync_dirty_log(kvm, memslot);
237         r = kvm_get_dirty_log(kvm, log, &is_dirty);
238         if (r)
239                 goto out;
240
241         /* Clear the dirty log */
242         if (is_dirty) {
243                 n = kvm_dirty_bitmap_bytes(memslot);
244                 memset(memslot->dirty_bitmap, 0, n);
245         }
246         r = 0;
247 out:
248         mutex_unlock(&kvm->slots_lock);
249         return r;
250 }
251
252 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
253 {
254         int r;
255
256         if (cap->flags)
257                 return -EINVAL;
258
259         switch (cap->cap) {
260         case KVM_CAP_S390_IRQCHIP:
261                 kvm->arch.use_irqchip = 1;
262                 r = 0;
263                 break;
264         case KVM_CAP_S390_USER_SIGP:
265                 kvm->arch.user_sigp = 1;
266                 r = 0;
267                 break;
268         default:
269                 r = -EINVAL;
270                 break;
271         }
272         return r;
273 }
274
275 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
276 {
277         int ret;
278
279         switch (attr->attr) {
280         case KVM_S390_VM_MEM_LIMIT_SIZE:
281                 ret = 0;
282                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
283                         ret = -EFAULT;
284                 break;
285         default:
286                 ret = -ENXIO;
287                 break;
288         }
289         return ret;
290 }
291
292 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
293 {
294         int ret;
295         unsigned int idx;
296         switch (attr->attr) {
297         case KVM_S390_VM_MEM_ENABLE_CMMA:
298                 ret = -EBUSY;
299                 mutex_lock(&kvm->lock);
300                 if (atomic_read(&kvm->online_vcpus) == 0) {
301                         kvm->arch.use_cmma = 1;
302                         ret = 0;
303                 }
304                 mutex_unlock(&kvm->lock);
305                 break;
306         case KVM_S390_VM_MEM_CLR_CMMA:
307                 mutex_lock(&kvm->lock);
308                 idx = srcu_read_lock(&kvm->srcu);
309                 s390_reset_cmma(kvm->arch.gmap->mm);
310                 srcu_read_unlock(&kvm->srcu, idx);
311                 mutex_unlock(&kvm->lock);
312                 ret = 0;
313                 break;
314         case KVM_S390_VM_MEM_LIMIT_SIZE: {
315                 unsigned long new_limit;
316
317                 if (kvm_is_ucontrol(kvm))
318                         return -EINVAL;
319
320                 if (get_user(new_limit, (u64 __user *)attr->addr))
321                         return -EFAULT;
322
323                 if (new_limit > kvm->arch.gmap->asce_end)
324                         return -E2BIG;
325
326                 ret = -EBUSY;
327                 mutex_lock(&kvm->lock);
328                 if (atomic_read(&kvm->online_vcpus) == 0) {
329                         /* gmap_alloc will round the limit up */
330                         struct gmap *new = gmap_alloc(current->mm, new_limit);
331
332                         if (!new) {
333                                 ret = -ENOMEM;
334                         } else {
335                                 gmap_free(kvm->arch.gmap);
336                                 new->private = kvm;
337                                 kvm->arch.gmap = new;
338                                 ret = 0;
339                         }
340                 }
341                 mutex_unlock(&kvm->lock);
342                 break;
343         }
344         default:
345                 ret = -ENXIO;
346                 break;
347         }
348         return ret;
349 }
350
351 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
352
353 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
354 {
355         struct kvm_vcpu *vcpu;
356         int i;
357
358         if (!test_kvm_facility(kvm, 76))
359                 return -EINVAL;
360
361         mutex_lock(&kvm->lock);
362         switch (attr->attr) {
363         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
364                 get_random_bytes(
365                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
366                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
367                 kvm->arch.crypto.aes_kw = 1;
368                 break;
369         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
370                 get_random_bytes(
371                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
372                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
373                 kvm->arch.crypto.dea_kw = 1;
374                 break;
375         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
376                 kvm->arch.crypto.aes_kw = 0;
377                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
378                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
379                 break;
380         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
381                 kvm->arch.crypto.dea_kw = 0;
382                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
383                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
384                 break;
385         default:
386                 mutex_unlock(&kvm->lock);
387                 return -ENXIO;
388         }
389
390         kvm_for_each_vcpu(i, vcpu, kvm) {
391                 kvm_s390_vcpu_crypto_setup(vcpu);
392                 exit_sie(vcpu);
393         }
394         mutex_unlock(&kvm->lock);
395         return 0;
396 }
397
398 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
399 {
400         u8 gtod_high;
401
402         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
403                                            sizeof(gtod_high)))
404                 return -EFAULT;
405
406         if (gtod_high != 0)
407                 return -EINVAL;
408
409         return 0;
410 }
411
412 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
413 {
414         struct kvm_vcpu *cur_vcpu;
415         unsigned int vcpu_idx;
416         u64 host_tod, gtod;
417         int r;
418
419         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
420                 return -EFAULT;
421
422         r = store_tod_clock(&host_tod);
423         if (r)
424                 return r;
425
426         mutex_lock(&kvm->lock);
427         kvm->arch.epoch = gtod - host_tod;
428         kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm) {
429                 cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
430                 exit_sie(cur_vcpu);
431         }
432         mutex_unlock(&kvm->lock);
433         return 0;
434 }
435
436 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
437 {
438         int ret;
439
440         if (attr->flags)
441                 return -EINVAL;
442
443         switch (attr->attr) {
444         case KVM_S390_VM_TOD_HIGH:
445                 ret = kvm_s390_set_tod_high(kvm, attr);
446                 break;
447         case KVM_S390_VM_TOD_LOW:
448                 ret = kvm_s390_set_tod_low(kvm, attr);
449                 break;
450         default:
451                 ret = -ENXIO;
452                 break;
453         }
454         return ret;
455 }
456
457 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
458 {
459         u8 gtod_high = 0;
460
461         if (copy_to_user((void __user *)attr->addr, &gtod_high,
462                                          sizeof(gtod_high)))
463                 return -EFAULT;
464
465         return 0;
466 }
467
468 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
469 {
470         u64 host_tod, gtod;
471         int r;
472
473         r = store_tod_clock(&host_tod);
474         if (r)
475                 return r;
476
477         gtod = host_tod + kvm->arch.epoch;
478         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
479                 return -EFAULT;
480
481         return 0;
482 }
483
484 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
485 {
486         int ret;
487
488         if (attr->flags)
489                 return -EINVAL;
490
491         switch (attr->attr) {
492         case KVM_S390_VM_TOD_HIGH:
493                 ret = kvm_s390_get_tod_high(kvm, attr);
494                 break;
495         case KVM_S390_VM_TOD_LOW:
496                 ret = kvm_s390_get_tod_low(kvm, attr);
497                 break;
498         default:
499                 ret = -ENXIO;
500                 break;
501         }
502         return ret;
503 }
504
505 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
506 {
507         struct kvm_s390_vm_cpu_processor *proc;
508         int ret = 0;
509
510         mutex_lock(&kvm->lock);
511         if (atomic_read(&kvm->online_vcpus)) {
512                 ret = -EBUSY;
513                 goto out;
514         }
515         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
516         if (!proc) {
517                 ret = -ENOMEM;
518                 goto out;
519         }
520         if (!copy_from_user(proc, (void __user *)attr->addr,
521                             sizeof(*proc))) {
522                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
523                        sizeof(struct cpuid));
524                 kvm->arch.model.ibc = proc->ibc;
525                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
526                        S390_ARCH_FAC_LIST_SIZE_BYTE);
527         } else
528                 ret = -EFAULT;
529         kfree(proc);
530 out:
531         mutex_unlock(&kvm->lock);
532         return ret;
533 }
534
535 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
536 {
537         int ret = -ENXIO;
538
539         switch (attr->attr) {
540         case KVM_S390_VM_CPU_PROCESSOR:
541                 ret = kvm_s390_set_processor(kvm, attr);
542                 break;
543         }
544         return ret;
545 }
546
547 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
548 {
549         struct kvm_s390_vm_cpu_processor *proc;
550         int ret = 0;
551
552         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
553         if (!proc) {
554                 ret = -ENOMEM;
555                 goto out;
556         }
557         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
558         proc->ibc = kvm->arch.model.ibc;
559         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
560         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
561                 ret = -EFAULT;
562         kfree(proc);
563 out:
564         return ret;
565 }
566
567 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
568 {
569         struct kvm_s390_vm_cpu_machine *mach;
570         int ret = 0;
571
572         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
573         if (!mach) {
574                 ret = -ENOMEM;
575                 goto out;
576         }
577         get_cpu_id((struct cpuid *) &mach->cpuid);
578         mach->ibc = sclp_get_ibc();
579         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
580                S390_ARCH_FAC_LIST_SIZE_BYTE);
581         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
582                S390_ARCH_FAC_LIST_SIZE_BYTE);
583         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
584                 ret = -EFAULT;
585         kfree(mach);
586 out:
587         return ret;
588 }
589
590 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
591 {
592         int ret = -ENXIO;
593
594         switch (attr->attr) {
595         case KVM_S390_VM_CPU_PROCESSOR:
596                 ret = kvm_s390_get_processor(kvm, attr);
597                 break;
598         case KVM_S390_VM_CPU_MACHINE:
599                 ret = kvm_s390_get_machine(kvm, attr);
600                 break;
601         }
602         return ret;
603 }
604
605 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
606 {
607         int ret;
608
609         switch (attr->group) {
610         case KVM_S390_VM_MEM_CTRL:
611                 ret = kvm_s390_set_mem_control(kvm, attr);
612                 break;
613         case KVM_S390_VM_TOD:
614                 ret = kvm_s390_set_tod(kvm, attr);
615                 break;
616         case KVM_S390_VM_CPU_MODEL:
617                 ret = kvm_s390_set_cpu_model(kvm, attr);
618                 break;
619         case KVM_S390_VM_CRYPTO:
620                 ret = kvm_s390_vm_set_crypto(kvm, attr);
621                 break;
622         default:
623                 ret = -ENXIO;
624                 break;
625         }
626
627         return ret;
628 }
629
630 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
631 {
632         int ret;
633
634         switch (attr->group) {
635         case KVM_S390_VM_MEM_CTRL:
636                 ret = kvm_s390_get_mem_control(kvm, attr);
637                 break;
638         case KVM_S390_VM_TOD:
639                 ret = kvm_s390_get_tod(kvm, attr);
640                 break;
641         case KVM_S390_VM_CPU_MODEL:
642                 ret = kvm_s390_get_cpu_model(kvm, attr);
643                 break;
644         default:
645                 ret = -ENXIO;
646                 break;
647         }
648
649         return ret;
650 }
651
652 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
653 {
654         int ret;
655
656         switch (attr->group) {
657         case KVM_S390_VM_MEM_CTRL:
658                 switch (attr->attr) {
659                 case KVM_S390_VM_MEM_ENABLE_CMMA:
660                 case KVM_S390_VM_MEM_CLR_CMMA:
661                 case KVM_S390_VM_MEM_LIMIT_SIZE:
662                         ret = 0;
663                         break;
664                 default:
665                         ret = -ENXIO;
666                         break;
667                 }
668                 break;
669         case KVM_S390_VM_TOD:
670                 switch (attr->attr) {
671                 case KVM_S390_VM_TOD_LOW:
672                 case KVM_S390_VM_TOD_HIGH:
673                         ret = 0;
674                         break;
675                 default:
676                         ret = -ENXIO;
677                         break;
678                 }
679                 break;
680         case KVM_S390_VM_CPU_MODEL:
681                 switch (attr->attr) {
682                 case KVM_S390_VM_CPU_PROCESSOR:
683                 case KVM_S390_VM_CPU_MACHINE:
684                         ret = 0;
685                         break;
686                 default:
687                         ret = -ENXIO;
688                         break;
689                 }
690                 break;
691         case KVM_S390_VM_CRYPTO:
692                 switch (attr->attr) {
693                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
694                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
695                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
696                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
697                         ret = 0;
698                         break;
699                 default:
700                         ret = -ENXIO;
701                         break;
702                 }
703                 break;
704         default:
705                 ret = -ENXIO;
706                 break;
707         }
708
709         return ret;
710 }
711
712 long kvm_arch_vm_ioctl(struct file *filp,
713                        unsigned int ioctl, unsigned long arg)
714 {
715         struct kvm *kvm = filp->private_data;
716         void __user *argp = (void __user *)arg;
717         struct kvm_device_attr attr;
718         int r;
719
720         switch (ioctl) {
721         case KVM_S390_INTERRUPT: {
722                 struct kvm_s390_interrupt s390int;
723
724                 r = -EFAULT;
725                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
726                         break;
727                 r = kvm_s390_inject_vm(kvm, &s390int);
728                 break;
729         }
730         case KVM_ENABLE_CAP: {
731                 struct kvm_enable_cap cap;
732                 r = -EFAULT;
733                 if (copy_from_user(&cap, argp, sizeof(cap)))
734                         break;
735                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
736                 break;
737         }
738         case KVM_CREATE_IRQCHIP: {
739                 struct kvm_irq_routing_entry routing;
740
741                 r = -EINVAL;
742                 if (kvm->arch.use_irqchip) {
743                         /* Set up dummy routing. */
744                         memset(&routing, 0, sizeof(routing));
745                         kvm_set_irq_routing(kvm, &routing, 0, 0);
746                         r = 0;
747                 }
748                 break;
749         }
750         case KVM_SET_DEVICE_ATTR: {
751                 r = -EFAULT;
752                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
753                         break;
754                 r = kvm_s390_vm_set_attr(kvm, &attr);
755                 break;
756         }
757         case KVM_GET_DEVICE_ATTR: {
758                 r = -EFAULT;
759                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
760                         break;
761                 r = kvm_s390_vm_get_attr(kvm, &attr);
762                 break;
763         }
764         case KVM_HAS_DEVICE_ATTR: {
765                 r = -EFAULT;
766                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
767                         break;
768                 r = kvm_s390_vm_has_attr(kvm, &attr);
769                 break;
770         }
771         default:
772                 r = -ENOTTY;
773         }
774
775         return r;
776 }
777
778 static int kvm_s390_query_ap_config(u8 *config)
779 {
780         u32 fcn_code = 0x04000000UL;
781         u32 cc = 0;
782
783         memset(config, 0, 128);
784         asm volatile(
785                 "lgr 0,%1\n"
786                 "lgr 2,%2\n"
787                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
788                 "0: ipm %0\n"
789                 "srl %0,28\n"
790                 "1:\n"
791                 EX_TABLE(0b, 1b)
792                 : "+r" (cc)
793                 : "r" (fcn_code), "r" (config)
794                 : "cc", "0", "2", "memory"
795         );
796
797         return cc;
798 }
799
800 static int kvm_s390_apxa_installed(void)
801 {
802         u8 config[128];
803         int cc;
804
805         if (test_facility(2) && test_facility(12)) {
806                 cc = kvm_s390_query_ap_config(config);
807
808                 if (cc)
809                         pr_err("PQAP(QCI) failed with cc=%d", cc);
810                 else
811                         return config[0] & 0x40;
812         }
813
814         return 0;
815 }
816
817 static void kvm_s390_set_crycb_format(struct kvm *kvm)
818 {
819         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
820
821         if (kvm_s390_apxa_installed())
822                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
823         else
824                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
825 }
826
827 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
828 {
829         get_cpu_id(cpu_id);
830         cpu_id->version = 0xff;
831 }
832
833 static int kvm_s390_crypto_init(struct kvm *kvm)
834 {
835         if (!test_kvm_facility(kvm, 76))
836                 return 0;
837
838         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
839                                          GFP_KERNEL | GFP_DMA);
840         if (!kvm->arch.crypto.crycb)
841                 return -ENOMEM;
842
843         kvm_s390_set_crycb_format(kvm);
844
845         /* Enable AES/DEA protected key functions by default */
846         kvm->arch.crypto.aes_kw = 1;
847         kvm->arch.crypto.dea_kw = 1;
848         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
849                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
850         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
851                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
852
853         return 0;
854 }
855
856 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
857 {
858         int i, rc;
859         char debug_name[16];
860         static unsigned long sca_offset;
861
862         rc = -EINVAL;
863 #ifdef CONFIG_KVM_S390_UCONTROL
864         if (type & ~KVM_VM_S390_UCONTROL)
865                 goto out_err;
866         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
867                 goto out_err;
868 #else
869         if (type)
870                 goto out_err;
871 #endif
872
873         rc = s390_enable_sie();
874         if (rc)
875                 goto out_err;
876
877         rc = -ENOMEM;
878
879         kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
880         if (!kvm->arch.sca)
881                 goto out_err;
882         spin_lock(&kvm_lock);
883         sca_offset = (sca_offset + 16) & 0x7f0;
884         kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
885         spin_unlock(&kvm_lock);
886
887         sprintf(debug_name, "kvm-%u", current->pid);
888
889         kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
890         if (!kvm->arch.dbf)
891                 goto out_nodbf;
892
893         /*
894          * The architectural maximum amount of facilities is 16 kbit. To store
895          * this amount, 2 kbyte of memory is required. Thus we need a full
896          * page to hold the guest facility list (arch.model.fac->list) and the
897          * facility mask (arch.model.fac->mask). Its address size has to be
898          * 31 bits and word aligned.
899          */
900         kvm->arch.model.fac =
901                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
902         if (!kvm->arch.model.fac)
903                 goto out_nofac;
904
905         /* Populate the facility mask initially. */
906         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
907                S390_ARCH_FAC_LIST_SIZE_BYTE);
908         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
909                 if (i < kvm_s390_fac_list_mask_size())
910                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
911                 else
912                         kvm->arch.model.fac->mask[i] = 0UL;
913         }
914
915         /* Populate the facility list initially. */
916         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
917                S390_ARCH_FAC_LIST_SIZE_BYTE);
918
919         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
920         kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
921
922         if (kvm_s390_crypto_init(kvm) < 0)
923                 goto out_crypto;
924
925         spin_lock_init(&kvm->arch.float_int.lock);
926         INIT_LIST_HEAD(&kvm->arch.float_int.list);
927         init_waitqueue_head(&kvm->arch.ipte_wq);
928         mutex_init(&kvm->arch.ipte_mutex);
929
930         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
931         VM_EVENT(kvm, 3, "%s", "vm created");
932
933         if (type & KVM_VM_S390_UCONTROL) {
934                 kvm->arch.gmap = NULL;
935         } else {
936                 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
937                 if (!kvm->arch.gmap)
938                         goto out_nogmap;
939                 kvm->arch.gmap->private = kvm;
940                 kvm->arch.gmap->pfault_enabled = 0;
941         }
942
943         kvm->arch.css_support = 0;
944         kvm->arch.use_irqchip = 0;
945         kvm->arch.epoch = 0;
946
947         spin_lock_init(&kvm->arch.start_stop_lock);
948
949         return 0;
950 out_nogmap:
951         kfree(kvm->arch.crypto.crycb);
952 out_crypto:
953         free_page((unsigned long)kvm->arch.model.fac);
954 out_nofac:
955         debug_unregister(kvm->arch.dbf);
956 out_nodbf:
957         free_page((unsigned long)(kvm->arch.sca));
958 out_err:
959         return rc;
960 }
961
962 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
963 {
964         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
965         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
966         kvm_s390_clear_local_irqs(vcpu);
967         kvm_clear_async_pf_completion_queue(vcpu);
968         if (!kvm_is_ucontrol(vcpu->kvm)) {
969                 clear_bit(63 - vcpu->vcpu_id,
970                           (unsigned long *) &vcpu->kvm->arch.sca->mcn);
971                 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
972                     (__u64) vcpu->arch.sie_block)
973                         vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
974         }
975         smp_mb();
976
977         if (kvm_is_ucontrol(vcpu->kvm))
978                 gmap_free(vcpu->arch.gmap);
979
980         if (kvm_s390_cmma_enabled(vcpu->kvm))
981                 kvm_s390_vcpu_unsetup_cmma(vcpu);
982         free_page((unsigned long)(vcpu->arch.sie_block));
983
984         kvm_vcpu_uninit(vcpu);
985         kmem_cache_free(kvm_vcpu_cache, vcpu);
986 }
987
988 static void kvm_free_vcpus(struct kvm *kvm)
989 {
990         unsigned int i;
991         struct kvm_vcpu *vcpu;
992
993         kvm_for_each_vcpu(i, vcpu, kvm)
994                 kvm_arch_vcpu_destroy(vcpu);
995
996         mutex_lock(&kvm->lock);
997         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
998                 kvm->vcpus[i] = NULL;
999
1000         atomic_set(&kvm->online_vcpus, 0);
1001         mutex_unlock(&kvm->lock);
1002 }
1003
1004 void kvm_arch_destroy_vm(struct kvm *kvm)
1005 {
1006         kvm_free_vcpus(kvm);
1007         free_page((unsigned long)kvm->arch.model.fac);
1008         free_page((unsigned long)(kvm->arch.sca));
1009         debug_unregister(kvm->arch.dbf);
1010         kfree(kvm->arch.crypto.crycb);
1011         if (!kvm_is_ucontrol(kvm))
1012                 gmap_free(kvm->arch.gmap);
1013         kvm_s390_destroy_adapters(kvm);
1014         kvm_s390_clear_float_irqs(kvm);
1015 }
1016
1017 /* Section: vcpu related */
1018 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1019 {
1020         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1021         if (!vcpu->arch.gmap)
1022                 return -ENOMEM;
1023         vcpu->arch.gmap->private = vcpu->kvm;
1024
1025         return 0;
1026 }
1027
1028 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1029 {
1030         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1031         kvm_clear_async_pf_completion_queue(vcpu);
1032         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1033                                     KVM_SYNC_GPRS |
1034                                     KVM_SYNC_ACRS |
1035                                     KVM_SYNC_CRS |
1036                                     KVM_SYNC_ARCH0 |
1037                                     KVM_SYNC_PFAULT;
1038
1039         if (kvm_is_ucontrol(vcpu->kvm))
1040                 return __kvm_ucontrol_vcpu_init(vcpu);
1041
1042         return 0;
1043 }
1044
1045 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1046 {
1047         save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
1048         save_fp_regs(vcpu->arch.host_fpregs.fprs);
1049         save_access_regs(vcpu->arch.host_acrs);
1050         restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1051         restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
1052         restore_access_regs(vcpu->run->s.regs.acrs);
1053         gmap_enable(vcpu->arch.gmap);
1054         atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1055 }
1056
1057 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1058 {
1059         atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1060         gmap_disable(vcpu->arch.gmap);
1061         save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1062         save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1063         save_access_regs(vcpu->run->s.regs.acrs);
1064         restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
1065         restore_fp_regs(vcpu->arch.host_fpregs.fprs);
1066         restore_access_regs(vcpu->arch.host_acrs);
1067 }
1068
1069 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1070 {
1071         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1072         vcpu->arch.sie_block->gpsw.mask = 0UL;
1073         vcpu->arch.sie_block->gpsw.addr = 0UL;
1074         kvm_s390_set_prefix(vcpu, 0);
1075         vcpu->arch.sie_block->cputm     = 0UL;
1076         vcpu->arch.sie_block->ckc       = 0UL;
1077         vcpu->arch.sie_block->todpr     = 0;
1078         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1079         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1080         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1081         vcpu->arch.guest_fpregs.fpc = 0;
1082         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1083         vcpu->arch.sie_block->gbea = 1;
1084         vcpu->arch.sie_block->pp = 0;
1085         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1086         kvm_clear_async_pf_completion_queue(vcpu);
1087         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1088                 kvm_s390_vcpu_stop(vcpu);
1089         kvm_s390_clear_local_irqs(vcpu);
1090 }
1091
1092 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1093 {
1094         mutex_lock(&vcpu->kvm->lock);
1095         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1096         mutex_unlock(&vcpu->kvm->lock);
1097         if (!kvm_is_ucontrol(vcpu->kvm))
1098                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1099 }
1100
1101 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1102 {
1103         if (!test_kvm_facility(vcpu->kvm, 76))
1104                 return;
1105
1106         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1107
1108         if (vcpu->kvm->arch.crypto.aes_kw)
1109                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1110         if (vcpu->kvm->arch.crypto.dea_kw)
1111                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1112
1113         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1114 }
1115
1116 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1117 {
1118         free_page(vcpu->arch.sie_block->cbrlo);
1119         vcpu->arch.sie_block->cbrlo = 0;
1120 }
1121
1122 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1123 {
1124         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1125         if (!vcpu->arch.sie_block->cbrlo)
1126                 return -ENOMEM;
1127
1128         vcpu->arch.sie_block->ecb2 |= 0x80;
1129         vcpu->arch.sie_block->ecb2 &= ~0x08;
1130         return 0;
1131 }
1132
1133 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1134 {
1135         int rc = 0;
1136
1137         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1138                                                     CPUSTAT_SM |
1139                                                     CPUSTAT_STOPPED |
1140                                                     CPUSTAT_GED);
1141         vcpu->arch.sie_block->ecb   = 6;
1142         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1143                 vcpu->arch.sie_block->ecb |= 0x10;
1144
1145         vcpu->arch.sie_block->ecb2  = 8;
1146         vcpu->arch.sie_block->eca   = 0xC1002000U;
1147         if (sclp_has_siif())
1148                 vcpu->arch.sie_block->eca |= 1;
1149         if (sclp_has_sigpif())
1150                 vcpu->arch.sie_block->eca |= 0x10000000U;
1151         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
1152                                       ICTL_TPROT;
1153
1154         if (kvm_s390_cmma_enabled(vcpu->kvm)) {
1155                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1156                 if (rc)
1157                         return rc;
1158         }
1159         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1160         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1161
1162         mutex_lock(&vcpu->kvm->lock);
1163         vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id;
1164         vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc;
1165         mutex_unlock(&vcpu->kvm->lock);
1166
1167         kvm_s390_vcpu_crypto_setup(vcpu);
1168
1169         return rc;
1170 }
1171
1172 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1173                                       unsigned int id)
1174 {
1175         struct kvm_vcpu *vcpu;
1176         struct sie_page *sie_page;
1177         int rc = -EINVAL;
1178
1179         if (id >= KVM_MAX_VCPUS)
1180                 goto out;
1181
1182         rc = -ENOMEM;
1183
1184         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1185         if (!vcpu)
1186                 goto out;
1187
1188         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1189         if (!sie_page)
1190                 goto out_free_cpu;
1191
1192         vcpu->arch.sie_block = &sie_page->sie_block;
1193         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1194
1195         vcpu->arch.sie_block->icpua = id;
1196         if (!kvm_is_ucontrol(kvm)) {
1197                 if (!kvm->arch.sca) {
1198                         WARN_ON_ONCE(1);
1199                         goto out_free_cpu;
1200                 }
1201                 if (!kvm->arch.sca->cpu[id].sda)
1202                         kvm->arch.sca->cpu[id].sda =
1203                                 (__u64) vcpu->arch.sie_block;
1204                 vcpu->arch.sie_block->scaoh =
1205                         (__u32)(((__u64)kvm->arch.sca) >> 32);
1206                 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1207                 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1208         }
1209         vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->list;
1210
1211         spin_lock_init(&vcpu->arch.local_int.lock);
1212         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1213         vcpu->arch.local_int.wq = &vcpu->wq;
1214         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1215
1216         rc = kvm_vcpu_init(vcpu, kvm, id);
1217         if (rc)
1218                 goto out_free_sie_block;
1219         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1220                  vcpu->arch.sie_block);
1221         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1222
1223         return vcpu;
1224 out_free_sie_block:
1225         free_page((unsigned long)(vcpu->arch.sie_block));
1226 out_free_cpu:
1227         kmem_cache_free(kvm_vcpu_cache, vcpu);
1228 out:
1229         return ERR_PTR(rc);
1230 }
1231
1232 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1233 {
1234         return kvm_s390_vcpu_has_irq(vcpu, 0);
1235 }
1236
1237 void s390_vcpu_block(struct kvm_vcpu *vcpu)
1238 {
1239         atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1240 }
1241
1242 void s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1243 {
1244         atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1245 }
1246
1247 /*
1248  * Kick a guest cpu out of SIE and wait until SIE is not running.
1249  * If the CPU is not running (e.g. waiting as idle) the function will
1250  * return immediately. */
1251 void exit_sie(struct kvm_vcpu *vcpu)
1252 {
1253         atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1254         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1255                 cpu_relax();
1256 }
1257
1258 /* Kick a guest cpu out of SIE and prevent SIE-reentry */
1259 void exit_sie_sync(struct kvm_vcpu *vcpu)
1260 {
1261         s390_vcpu_block(vcpu);
1262         exit_sie(vcpu);
1263 }
1264
1265 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1266 {
1267         int i;
1268         struct kvm *kvm = gmap->private;
1269         struct kvm_vcpu *vcpu;
1270
1271         kvm_for_each_vcpu(i, vcpu, kvm) {
1272                 /* match against both prefix pages */
1273                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1274                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1275                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
1276                         exit_sie_sync(vcpu);
1277                 }
1278         }
1279 }
1280
1281 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1282 {
1283         /* kvm common code refers to this, but never calls it */
1284         BUG();
1285         return 0;
1286 }
1287
1288 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1289                                            struct kvm_one_reg *reg)
1290 {
1291         int r = -EINVAL;
1292
1293         switch (reg->id) {
1294         case KVM_REG_S390_TODPR:
1295                 r = put_user(vcpu->arch.sie_block->todpr,
1296                              (u32 __user *)reg->addr);
1297                 break;
1298         case KVM_REG_S390_EPOCHDIFF:
1299                 r = put_user(vcpu->arch.sie_block->epoch,
1300                              (u64 __user *)reg->addr);
1301                 break;
1302         case KVM_REG_S390_CPU_TIMER:
1303                 r = put_user(vcpu->arch.sie_block->cputm,
1304                              (u64 __user *)reg->addr);
1305                 break;
1306         case KVM_REG_S390_CLOCK_COMP:
1307                 r = put_user(vcpu->arch.sie_block->ckc,
1308                              (u64 __user *)reg->addr);
1309                 break;
1310         case KVM_REG_S390_PFTOKEN:
1311                 r = put_user(vcpu->arch.pfault_token,
1312                              (u64 __user *)reg->addr);
1313                 break;
1314         case KVM_REG_S390_PFCOMPARE:
1315                 r = put_user(vcpu->arch.pfault_compare,
1316                              (u64 __user *)reg->addr);
1317                 break;
1318         case KVM_REG_S390_PFSELECT:
1319                 r = put_user(vcpu->arch.pfault_select,
1320                              (u64 __user *)reg->addr);
1321                 break;
1322         case KVM_REG_S390_PP:
1323                 r = put_user(vcpu->arch.sie_block->pp,
1324                              (u64 __user *)reg->addr);
1325                 break;
1326         case KVM_REG_S390_GBEA:
1327                 r = put_user(vcpu->arch.sie_block->gbea,
1328                              (u64 __user *)reg->addr);
1329                 break;
1330         default:
1331                 break;
1332         }
1333
1334         return r;
1335 }
1336
1337 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1338                                            struct kvm_one_reg *reg)
1339 {
1340         int r = -EINVAL;
1341
1342         switch (reg->id) {
1343         case KVM_REG_S390_TODPR:
1344                 r = get_user(vcpu->arch.sie_block->todpr,
1345                              (u32 __user *)reg->addr);
1346                 break;
1347         case KVM_REG_S390_EPOCHDIFF:
1348                 r = get_user(vcpu->arch.sie_block->epoch,
1349                              (u64 __user *)reg->addr);
1350                 break;
1351         case KVM_REG_S390_CPU_TIMER:
1352                 r = get_user(vcpu->arch.sie_block->cputm,
1353                              (u64 __user *)reg->addr);
1354                 break;
1355         case KVM_REG_S390_CLOCK_COMP:
1356                 r = get_user(vcpu->arch.sie_block->ckc,
1357                              (u64 __user *)reg->addr);
1358                 break;
1359         case KVM_REG_S390_PFTOKEN:
1360                 r = get_user(vcpu->arch.pfault_token,
1361                              (u64 __user *)reg->addr);
1362                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1363                         kvm_clear_async_pf_completion_queue(vcpu);
1364                 break;
1365         case KVM_REG_S390_PFCOMPARE:
1366                 r = get_user(vcpu->arch.pfault_compare,
1367                              (u64 __user *)reg->addr);
1368                 break;
1369         case KVM_REG_S390_PFSELECT:
1370                 r = get_user(vcpu->arch.pfault_select,
1371                              (u64 __user *)reg->addr);
1372                 break;
1373         case KVM_REG_S390_PP:
1374                 r = get_user(vcpu->arch.sie_block->pp,
1375                              (u64 __user *)reg->addr);
1376                 break;
1377         case KVM_REG_S390_GBEA:
1378                 r = get_user(vcpu->arch.sie_block->gbea,
1379                              (u64 __user *)reg->addr);
1380                 break;
1381         default:
1382                 break;
1383         }
1384
1385         return r;
1386 }
1387
1388 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1389 {
1390         kvm_s390_vcpu_initial_reset(vcpu);
1391         return 0;
1392 }
1393
1394 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1395 {
1396         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1397         return 0;
1398 }
1399
1400 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1401 {
1402         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1403         return 0;
1404 }
1405
1406 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1407                                   struct kvm_sregs *sregs)
1408 {
1409         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1410         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1411         restore_access_regs(vcpu->run->s.regs.acrs);
1412         return 0;
1413 }
1414
1415 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1416                                   struct kvm_sregs *sregs)
1417 {
1418         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1419         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1420         return 0;
1421 }
1422
1423 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1424 {
1425         if (test_fp_ctl(fpu->fpc))
1426                 return -EINVAL;
1427         memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1428         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1429         restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1430         restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
1431         return 0;
1432 }
1433
1434 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1435 {
1436         memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1437         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1438         return 0;
1439 }
1440
1441 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1442 {
1443         int rc = 0;
1444
1445         if (!is_vcpu_stopped(vcpu))
1446                 rc = -EBUSY;
1447         else {
1448                 vcpu->run->psw_mask = psw.mask;
1449                 vcpu->run->psw_addr = psw.addr;
1450         }
1451         return rc;
1452 }
1453
1454 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1455                                   struct kvm_translation *tr)
1456 {
1457         return -EINVAL; /* not implemented yet */
1458 }
1459
1460 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1461                               KVM_GUESTDBG_USE_HW_BP | \
1462                               KVM_GUESTDBG_ENABLE)
1463
1464 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1465                                         struct kvm_guest_debug *dbg)
1466 {
1467         int rc = 0;
1468
1469         vcpu->guest_debug = 0;
1470         kvm_s390_clear_bp_data(vcpu);
1471
1472         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1473                 return -EINVAL;
1474
1475         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1476                 vcpu->guest_debug = dbg->control;
1477                 /* enforce guest PER */
1478                 atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1479
1480                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1481                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1482         } else {
1483                 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1484                 vcpu->arch.guestdbg.last_bp = 0;
1485         }
1486
1487         if (rc) {
1488                 vcpu->guest_debug = 0;
1489                 kvm_s390_clear_bp_data(vcpu);
1490                 atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1491         }
1492
1493         return rc;
1494 }
1495
1496 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1497                                     struct kvm_mp_state *mp_state)
1498 {
1499         /* CHECK_STOP and LOAD are not supported yet */
1500         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1501                                        KVM_MP_STATE_OPERATING;
1502 }
1503
1504 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1505                                     struct kvm_mp_state *mp_state)
1506 {
1507         int rc = 0;
1508
1509         /* user space knows about this interface - let it control the state */
1510         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1511
1512         switch (mp_state->mp_state) {
1513         case KVM_MP_STATE_STOPPED:
1514                 kvm_s390_vcpu_stop(vcpu);
1515                 break;
1516         case KVM_MP_STATE_OPERATING:
1517                 kvm_s390_vcpu_start(vcpu);
1518                 break;
1519         case KVM_MP_STATE_LOAD:
1520         case KVM_MP_STATE_CHECK_STOP:
1521                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1522         default:
1523                 rc = -ENXIO;
1524         }
1525
1526         return rc;
1527 }
1528
1529 bool kvm_s390_cmma_enabled(struct kvm *kvm)
1530 {
1531         if (!MACHINE_IS_LPAR)
1532                 return false;
1533         /* only enable for z10 and later */
1534         if (!MACHINE_HAS_EDAT1)
1535                 return false;
1536         if (!kvm->arch.use_cmma)
1537                 return false;
1538         return true;
1539 }
1540
1541 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1542 {
1543         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1544 }
1545
1546 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1547 {
1548 retry:
1549         s390_vcpu_unblock(vcpu);
1550         /*
1551          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1552          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1553          * This ensures that the ipte instruction for this request has
1554          * already finished. We might race against a second unmapper that
1555          * wants to set the blocking bit. Lets just retry the request loop.
1556          */
1557         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1558                 int rc;
1559                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1560                                       kvm_s390_get_prefix(vcpu),
1561                                       PAGE_SIZE * 2);
1562                 if (rc)
1563                         return rc;
1564                 goto retry;
1565         }
1566
1567         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1568                 vcpu->arch.sie_block->ihcpu = 0xffff;
1569                 goto retry;
1570         }
1571
1572         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1573                 if (!ibs_enabled(vcpu)) {
1574                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1575                         atomic_set_mask(CPUSTAT_IBS,
1576                                         &vcpu->arch.sie_block->cpuflags);
1577                 }
1578                 goto retry;
1579         }
1580
1581         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1582                 if (ibs_enabled(vcpu)) {
1583                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1584                         atomic_clear_mask(CPUSTAT_IBS,
1585                                           &vcpu->arch.sie_block->cpuflags);
1586                 }
1587                 goto retry;
1588         }
1589
1590         /* nothing to do, just clear the request */
1591         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1592
1593         return 0;
1594 }
1595
1596 /**
1597  * kvm_arch_fault_in_page - fault-in guest page if necessary
1598  * @vcpu: The corresponding virtual cpu
1599  * @gpa: Guest physical address
1600  * @writable: Whether the page should be writable or not
1601  *
1602  * Make sure that a guest page has been faulted-in on the host.
1603  *
1604  * Return: Zero on success, negative error code otherwise.
1605  */
1606 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1607 {
1608         return gmap_fault(vcpu->arch.gmap, gpa,
1609                           writable ? FAULT_FLAG_WRITE : 0);
1610 }
1611
1612 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1613                                       unsigned long token)
1614 {
1615         struct kvm_s390_interrupt inti;
1616         struct kvm_s390_irq irq;
1617
1618         if (start_token) {
1619                 irq.u.ext.ext_params2 = token;
1620                 irq.type = KVM_S390_INT_PFAULT_INIT;
1621                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1622         } else {
1623                 inti.type = KVM_S390_INT_PFAULT_DONE;
1624                 inti.parm64 = token;
1625                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1626         }
1627 }
1628
1629 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1630                                      struct kvm_async_pf *work)
1631 {
1632         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1633         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1634 }
1635
1636 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1637                                  struct kvm_async_pf *work)
1638 {
1639         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1640         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1641 }
1642
1643 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1644                                struct kvm_async_pf *work)
1645 {
1646         /* s390 will always inject the page directly */
1647 }
1648
1649 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1650 {
1651         /*
1652          * s390 will always inject the page directly,
1653          * but we still want check_async_completion to cleanup
1654          */
1655         return true;
1656 }
1657
1658 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1659 {
1660         hva_t hva;
1661         struct kvm_arch_async_pf arch;
1662         int rc;
1663
1664         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1665                 return 0;
1666         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1667             vcpu->arch.pfault_compare)
1668                 return 0;
1669         if (psw_extint_disabled(vcpu))
1670                 return 0;
1671         if (kvm_s390_vcpu_has_irq(vcpu, 0))
1672                 return 0;
1673         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1674                 return 0;
1675         if (!vcpu->arch.gmap->pfault_enabled)
1676                 return 0;
1677
1678         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1679         hva += current->thread.gmap_addr & ~PAGE_MASK;
1680         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1681                 return 0;
1682
1683         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
1684         return rc;
1685 }
1686
1687 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
1688 {
1689         int rc, cpuflags;
1690
1691         /*
1692          * On s390 notifications for arriving pages will be delivered directly
1693          * to the guest but the house keeping for completed pfaults is
1694          * handled outside the worker.
1695          */
1696         kvm_check_async_pf_completion(vcpu);
1697
1698         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
1699
1700         if (need_resched())
1701                 schedule();
1702
1703         if (test_cpu_flag(CIF_MCCK_PENDING))
1704                 s390_handle_mcck();
1705
1706         if (!kvm_is_ucontrol(vcpu->kvm)) {
1707                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
1708                 if (rc)
1709                         return rc;
1710         }
1711
1712         rc = kvm_s390_handle_requests(vcpu);
1713         if (rc)
1714                 return rc;
1715
1716         if (guestdbg_enabled(vcpu)) {
1717                 kvm_s390_backup_guest_per_regs(vcpu);
1718                 kvm_s390_patch_guest_per_regs(vcpu);
1719         }
1720
1721         vcpu->arch.sie_block->icptcode = 0;
1722         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
1723         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
1724         trace_kvm_s390_sie_enter(vcpu, cpuflags);
1725
1726         return 0;
1727 }
1728
1729 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
1730 {
1731         int rc = -1;
1732
1733         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
1734                    vcpu->arch.sie_block->icptcode);
1735         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
1736
1737         if (guestdbg_enabled(vcpu))
1738                 kvm_s390_restore_guest_per_regs(vcpu);
1739
1740         if (exit_reason >= 0) {
1741                 rc = 0;
1742         } else if (kvm_is_ucontrol(vcpu->kvm)) {
1743                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
1744                 vcpu->run->s390_ucontrol.trans_exc_code =
1745                                                 current->thread.gmap_addr;
1746                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
1747                 rc = -EREMOTE;
1748
1749         } else if (current->thread.gmap_pfault) {
1750                 trace_kvm_s390_major_guest_pfault(vcpu);
1751                 current->thread.gmap_pfault = 0;
1752                 if (kvm_arch_setup_async_pf(vcpu)) {
1753                         rc = 0;
1754                 } else {
1755                         gpa_t gpa = current->thread.gmap_addr;
1756                         rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
1757                 }
1758         }
1759
1760         if (rc == -1) {
1761                 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
1762                 trace_kvm_s390_sie_fault(vcpu);
1763                 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
1764         }
1765
1766         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
1767
1768         if (rc == 0) {
1769                 if (kvm_is_ucontrol(vcpu->kvm))
1770                         /* Don't exit for host interrupts. */
1771                         rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
1772                 else
1773                         rc = kvm_handle_sie_intercept(vcpu);
1774         }
1775
1776         return rc;
1777 }
1778
1779 static int __vcpu_run(struct kvm_vcpu *vcpu)
1780 {
1781         int rc, exit_reason;
1782
1783         /*
1784          * We try to hold kvm->srcu during most of vcpu_run (except when run-
1785          * ning the guest), so that memslots (and other stuff) are protected
1786          */
1787         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1788
1789         do {
1790                 rc = vcpu_pre_run(vcpu);
1791                 if (rc)
1792                         break;
1793
1794                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1795                 /*
1796                  * As PF_VCPU will be used in fault handler, between
1797                  * guest_enter and guest_exit should be no uaccess.
1798                  */
1799                 preempt_disable();
1800                 kvm_guest_enter();
1801                 preempt_enable();
1802                 exit_reason = sie64a(vcpu->arch.sie_block,
1803                                      vcpu->run->s.regs.gprs);
1804                 kvm_guest_exit();
1805                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
1806
1807                 rc = vcpu_post_run(vcpu, exit_reason);
1808         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
1809
1810         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
1811         return rc;
1812 }
1813
1814 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1815 {
1816         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
1817         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
1818         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
1819                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
1820         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
1821                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
1822                 /* some control register changes require a tlb flush */
1823                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1824         }
1825         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
1826                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
1827                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
1828                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
1829                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
1830                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
1831         }
1832         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
1833                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
1834                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
1835                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
1836                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1837                         kvm_clear_async_pf_completion_queue(vcpu);
1838         }
1839         kvm_run->kvm_dirty_regs = 0;
1840 }
1841
1842 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1843 {
1844         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
1845         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
1846         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
1847         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
1848         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
1849         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
1850         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
1851         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
1852         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
1853         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
1854         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
1855         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
1856 }
1857
1858 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1859 {
1860         int rc;
1861         sigset_t sigsaved;
1862
1863         if (guestdbg_exit_pending(vcpu)) {
1864                 kvm_s390_prepare_debug_exit(vcpu);
1865                 return 0;
1866         }
1867
1868         if (vcpu->sigset_active)
1869                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
1870
1871         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
1872                 kvm_s390_vcpu_start(vcpu);
1873         } else if (is_vcpu_stopped(vcpu)) {
1874                 pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
1875                                    vcpu->vcpu_id);
1876                 return -EINVAL;
1877         }
1878
1879         sync_regs(vcpu, kvm_run);
1880
1881         might_fault();
1882         rc = __vcpu_run(vcpu);
1883
1884         if (signal_pending(current) && !rc) {
1885                 kvm_run->exit_reason = KVM_EXIT_INTR;
1886                 rc = -EINTR;
1887         }
1888
1889         if (guestdbg_exit_pending(vcpu) && !rc)  {
1890                 kvm_s390_prepare_debug_exit(vcpu);
1891                 rc = 0;
1892         }
1893
1894         if (rc == -EOPNOTSUPP) {
1895                 /* intercept cannot be handled in-kernel, prepare kvm-run */
1896                 kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
1897                 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
1898                 kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
1899                 kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
1900                 rc = 0;
1901         }
1902
1903         if (rc == -EREMOTE) {
1904                 /* intercept was handled, but userspace support is needed
1905                  * kvm_run has been prepared by the handler */
1906                 rc = 0;
1907         }
1908
1909         store_regs(vcpu, kvm_run);
1910
1911         if (vcpu->sigset_active)
1912                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1913
1914         vcpu->stat.exit_userspace++;
1915         return rc;
1916 }
1917
1918 /*
1919  * store status at address
1920  * we use have two special cases:
1921  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
1922  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
1923  */
1924 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
1925 {
1926         unsigned char archmode = 1;
1927         unsigned int px;
1928         u64 clkcomp;
1929         int rc;
1930
1931         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
1932                 if (write_guest_abs(vcpu, 163, &archmode, 1))
1933                         return -EFAULT;
1934                 gpa = SAVE_AREA_BASE;
1935         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
1936                 if (write_guest_real(vcpu, 163, &archmode, 1))
1937                         return -EFAULT;
1938                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
1939         }
1940         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
1941                              vcpu->arch.guest_fpregs.fprs, 128);
1942         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
1943                               vcpu->run->s.regs.gprs, 128);
1944         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
1945                               &vcpu->arch.sie_block->gpsw, 16);
1946         px = kvm_s390_get_prefix(vcpu);
1947         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
1948                               &px, 4);
1949         rc |= write_guest_abs(vcpu,
1950                               gpa + offsetof(struct save_area, fp_ctrl_reg),
1951                               &vcpu->arch.guest_fpregs.fpc, 4);
1952         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
1953                               &vcpu->arch.sie_block->todpr, 4);
1954         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
1955                               &vcpu->arch.sie_block->cputm, 8);
1956         clkcomp = vcpu->arch.sie_block->ckc >> 8;
1957         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
1958                               &clkcomp, 8);
1959         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
1960                               &vcpu->run->s.regs.acrs, 64);
1961         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
1962                               &vcpu->arch.sie_block->gcr, 128);
1963         return rc ? -EFAULT : 0;
1964 }
1965
1966 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
1967 {
1968         /*
1969          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
1970          * copying in vcpu load/put. Lets update our copies before we save
1971          * it into the save area
1972          */
1973         save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
1974         save_fp_regs(vcpu->arch.guest_fpregs.fprs);
1975         save_access_regs(vcpu->run->s.regs.acrs);
1976
1977         return kvm_s390_store_status_unloaded(vcpu, addr);
1978 }
1979
1980 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
1981 {
1982         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
1983         kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
1984         exit_sie_sync(vcpu);
1985 }
1986
1987 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
1988 {
1989         unsigned int i;
1990         struct kvm_vcpu *vcpu;
1991
1992         kvm_for_each_vcpu(i, vcpu, kvm) {
1993                 __disable_ibs_on_vcpu(vcpu);
1994         }
1995 }
1996
1997 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
1998 {
1999         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2000         kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
2001         exit_sie_sync(vcpu);
2002 }
2003
2004 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2005 {
2006         int i, online_vcpus, started_vcpus = 0;
2007
2008         if (!is_vcpu_stopped(vcpu))
2009                 return;
2010
2011         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2012         /* Only one cpu at a time may enter/leave the STOPPED state. */
2013         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2014         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2015
2016         for (i = 0; i < online_vcpus; i++) {
2017                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2018                         started_vcpus++;
2019         }
2020
2021         if (started_vcpus == 0) {
2022                 /* we're the only active VCPU -> speed it up */
2023                 __enable_ibs_on_vcpu(vcpu);
2024         } else if (started_vcpus == 1) {
2025                 /*
2026                  * As we are starting a second VCPU, we have to disable
2027                  * the IBS facility on all VCPUs to remove potentially
2028                  * oustanding ENABLE requests.
2029                  */
2030                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2031         }
2032
2033         atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2034         /*
2035          * Another VCPU might have used IBS while we were offline.
2036          * Let's play safe and flush the VCPU at startup.
2037          */
2038         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2039         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2040         return;
2041 }
2042
2043 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2044 {
2045         int i, online_vcpus, started_vcpus = 0;
2046         struct kvm_vcpu *started_vcpu = NULL;
2047
2048         if (is_vcpu_stopped(vcpu))
2049                 return;
2050
2051         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2052         /* Only one cpu at a time may enter/leave the STOPPED state. */
2053         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2054         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2055
2056         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2057         kvm_s390_clear_stop_irq(vcpu);
2058
2059         atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2060         __disable_ibs_on_vcpu(vcpu);
2061
2062         for (i = 0; i < online_vcpus; i++) {
2063                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2064                         started_vcpus++;
2065                         started_vcpu = vcpu->kvm->vcpus[i];
2066                 }
2067         }
2068
2069         if (started_vcpus == 1) {
2070                 /*
2071                  * As we only have one VCPU left, we want to enable the
2072                  * IBS facility for that VCPU to speed it up.
2073                  */
2074                 __enable_ibs_on_vcpu(started_vcpu);
2075         }
2076
2077         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2078         return;
2079 }
2080
2081 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2082                                      struct kvm_enable_cap *cap)
2083 {
2084         int r;
2085
2086         if (cap->flags)
2087                 return -EINVAL;
2088
2089         switch (cap->cap) {
2090         case KVM_CAP_S390_CSS_SUPPORT:
2091                 if (!vcpu->kvm->arch.css_support) {
2092                         vcpu->kvm->arch.css_support = 1;
2093                         trace_kvm_s390_enable_css(vcpu->kvm);
2094                 }
2095                 r = 0;
2096                 break;
2097         default:
2098                 r = -EINVAL;
2099                 break;
2100         }
2101         return r;
2102 }
2103
2104 long kvm_arch_vcpu_ioctl(struct file *filp,
2105                          unsigned int ioctl, unsigned long arg)
2106 {
2107         struct kvm_vcpu *vcpu = filp->private_data;
2108         void __user *argp = (void __user *)arg;
2109         int idx;
2110         long r;
2111
2112         switch (ioctl) {
2113         case KVM_S390_INTERRUPT: {
2114                 struct kvm_s390_interrupt s390int;
2115                 struct kvm_s390_irq s390irq;
2116
2117                 r = -EFAULT;
2118                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2119                         break;
2120                 if (s390int_to_s390irq(&s390int, &s390irq))
2121                         return -EINVAL;
2122                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2123                 break;
2124         }
2125         case KVM_S390_STORE_STATUS:
2126                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2127                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2128                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2129                 break;
2130         case KVM_S390_SET_INITIAL_PSW: {
2131                 psw_t psw;
2132
2133                 r = -EFAULT;
2134                 if (copy_from_user(&psw, argp, sizeof(psw)))
2135                         break;
2136                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2137                 break;
2138         }
2139         case KVM_S390_INITIAL_RESET:
2140                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2141                 break;
2142         case KVM_SET_ONE_REG:
2143         case KVM_GET_ONE_REG: {
2144                 struct kvm_one_reg reg;
2145                 r = -EFAULT;
2146                 if (copy_from_user(&reg, argp, sizeof(reg)))
2147                         break;
2148                 if (ioctl == KVM_SET_ONE_REG)
2149                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2150                 else
2151                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2152                 break;
2153         }
2154 #ifdef CONFIG_KVM_S390_UCONTROL
2155         case KVM_S390_UCAS_MAP: {
2156                 struct kvm_s390_ucas_mapping ucasmap;
2157
2158                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2159                         r = -EFAULT;
2160                         break;
2161                 }
2162
2163                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2164                         r = -EINVAL;
2165                         break;
2166                 }
2167
2168                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2169                                      ucasmap.vcpu_addr, ucasmap.length);
2170                 break;
2171         }
2172         case KVM_S390_UCAS_UNMAP: {
2173                 struct kvm_s390_ucas_mapping ucasmap;
2174
2175                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2176                         r = -EFAULT;
2177                         break;
2178                 }
2179
2180                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2181                         r = -EINVAL;
2182                         break;
2183                 }
2184
2185                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2186                         ucasmap.length);
2187                 break;
2188         }
2189 #endif
2190         case KVM_S390_VCPU_FAULT: {
2191                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2192                 break;
2193         }
2194         case KVM_ENABLE_CAP:
2195         {
2196                 struct kvm_enable_cap cap;
2197                 r = -EFAULT;
2198                 if (copy_from_user(&cap, argp, sizeof(cap)))
2199                         break;
2200                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2201                 break;
2202         }
2203         default:
2204                 r = -ENOTTY;
2205         }
2206         return r;
2207 }
2208
2209 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2210 {
2211 #ifdef CONFIG_KVM_S390_UCONTROL
2212         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2213                  && (kvm_is_ucontrol(vcpu->kvm))) {
2214                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2215                 get_page(vmf->page);
2216                 return 0;
2217         }
2218 #endif
2219         return VM_FAULT_SIGBUS;
2220 }
2221
2222 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2223                             unsigned long npages)
2224 {
2225         return 0;
2226 }
2227
2228 /* Section: memory related */
2229 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2230                                    struct kvm_memory_slot *memslot,
2231                                    struct kvm_userspace_memory_region *mem,
2232                                    enum kvm_mr_change change)
2233 {
2234         /* A few sanity checks. We can have memory slots which have to be
2235            located/ended at a segment boundary (1MB). The memory in userland is
2236            ok to be fragmented into various different vmas. It is okay to mmap()
2237            and munmap() stuff in this slot after doing this call at any time */
2238
2239         if (mem->userspace_addr & 0xffffful)
2240                 return -EINVAL;
2241
2242         if (mem->memory_size & 0xffffful)
2243                 return -EINVAL;
2244
2245         return 0;
2246 }
2247
2248 void kvm_arch_commit_memory_region(struct kvm *kvm,
2249                                 struct kvm_userspace_memory_region *mem,
2250                                 const struct kvm_memory_slot *old,
2251                                 enum kvm_mr_change change)
2252 {
2253         int rc;
2254
2255         /* If the basics of the memslot do not change, we do not want
2256          * to update the gmap. Every update causes several unnecessary
2257          * segment translation exceptions. This is usually handled just
2258          * fine by the normal fault handler + gmap, but it will also
2259          * cause faults on the prefix page of running guest CPUs.
2260          */
2261         if (old->userspace_addr == mem->userspace_addr &&
2262             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2263             old->npages * PAGE_SIZE == mem->memory_size)
2264                 return;
2265
2266         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2267                 mem->guest_phys_addr, mem->memory_size);
2268         if (rc)
2269                 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n");
2270         return;
2271 }
2272
2273 static int __init kvm_s390_init(void)
2274 {
2275         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2276 }
2277
2278 static void __exit kvm_s390_exit(void)
2279 {
2280         kvm_exit();
2281 }
2282
2283 module_init(kvm_s390_init);
2284 module_exit(kvm_s390_exit);
2285
2286 /*
2287  * Enable autoloading of the kvm module.
2288  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2289  * since x86 takes a different approach.
2290  */
2291 #include <linux/miscdevice.h>
2292 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2293 MODULE_ALIAS("devname:kvm");