arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/random.h>
  27 #include <linux/slab.h>
  28 #include <linux/timer.h>
  29 #include <linux/vmalloc.h>
  30 #include <linux/bitmap.h>
  31 #include <asm/asm-offsets.h>
  32 #include <asm/lowcore.h>
  33 #include <asm/etr.h>
  34 #include <asm/pgtable.h>
  35 #include <asm/gmap.h>
  36 #include <asm/nmi.h>
  37 #include <asm/switch_to.h>
  38 #include <asm/isc.h>
  39 #include <asm/sclp.h>
  40 #include <asm/cpacf.h>
  41 #include <asm/etr.h>
  42 #include "kvm-s390.h"
  43 #include "gaccess.h"
  44
  45 #define KMSG_COMPONENT "kvm-s390"
  46 #undef pr_fmt
  47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  48
  49 #define CREATE_TRACE_POINTS
  50 #include "trace.h"
  51 #include "trace-s390.h"
  52
  53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  54 #define LOCAL_IRQS 32
  55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  57
  58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  59
  60 struct kvm_stats_debugfs_item debugfs_entries[] = {
  61         { "userspace_handled", VCPU_STAT(exit_userspace) },
  62         { "exit_null", VCPU_STAT(exit_null) },
  63         { "exit_validity", VCPU_STAT(exit_validity) },
  64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  65         { "exit_external_request", VCPU_STAT(exit_external_request) },
  66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  67         { "exit_instruction", VCPU_STAT(exit_instruction) },
  68         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  69         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  70         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  71         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  72         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  73         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  74         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  75         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  76         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  77         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  78         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  79         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  80         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  81         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  82         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  83         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  84         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  85         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  86         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  87         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  88         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  89         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  90         { "instruction_spx", VCPU_STAT(instruction_spx) },
  91         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  92         { "instruction_stap", VCPU_STAT(instruction_stap) },
  93         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  94         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  95         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  96         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  97         { "instruction_essa", VCPU_STAT(instruction_essa) },
  98         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  99         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 100         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 101         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 102         { "instruction_sie", VCPU_STAT(instruction_sie) },
 103         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 104         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 105         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 106         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 107         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 108         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 109         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 110         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 111         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 112         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 113         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 114         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 115         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 116         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 117         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 118         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 119         { "diagnose_10", VCPU_STAT(diagnose_10) },
 120         { "diagnose_44", VCPU_STAT(diagnose_44) },
 121         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 122         { "diagnose_258", VCPU_STAT(diagnose_258) },
 123         { "diagnose_308", VCPU_STAT(diagnose_308) },
 124         { "diagnose_500", VCPU_STAT(diagnose_500) },
 125         { NULL }
 126 };
 127
 128 /* allow nested virtualization in KVM (if enabled by user space) */
 129 static int nested;
 130 module_param(nested, int, S_IRUGO);
 131 MODULE_PARM_DESC(nested, "Nested virtualization support");
 132
 133 /* upper facilities limit for kvm */
 134 unsigned long kvm_s390_fac_list_mask[16] = {
 135         0xffe6000000000000UL,
 136         0x005e000000000000UL,
 137 };
 138
 139 unsigned long kvm_s390_fac_list_mask_size(void)
 140 {
 141         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 142         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 143 }
 144
 145 /* available cpu features supported by kvm */
 146 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 147 /* available subfunctions indicated via query / "test bit" */
 148 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 149
 150 static struct gmap_notifier gmap_notifier;
 151 static struct gmap_notifier vsie_gmap_notifier;
 152 debug_info_t *kvm_s390_dbf;
 153
 154 /* Section: not file related */
 155 int kvm_arch_hardware_enable(void)
 156 {
 157         /* every s390 is virtualization enabled ;-) */
 158         return 0;
 159 }
 160
 161 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 162                               unsigned long end);
 163
 164 /*
 165  * This callback is executed during stop_machine(). All CPUs are therefore
 166  * temporarily stopped. In order not to change guest behavior, we have to
 167  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 168  * so a CPU won't be stopped while calculating with the epoch.
 169  */
 170 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 171                           void *v)
 172 {
 173         struct kvm *kvm;
 174         struct kvm_vcpu *vcpu;
 175         int i;
 176         unsigned long long *delta = v;
 177
 178         list_for_each_entry(kvm, &vm_list, vm_list) {
 179                 kvm->arch.epoch -= *delta;
 180                 kvm_for_each_vcpu(i, vcpu, kvm) {
 181                         vcpu->arch.sie_block->epoch -= *delta;
 182                         if (vcpu->arch.cputm_enabled)
 183                                 vcpu->arch.cputm_start += *delta;
 184                         if (vcpu->arch.vsie_block)
 185                                 vcpu->arch.vsie_block->epoch -= *delta;
 186                 }
 187         }
 188         return NOTIFY_OK;
 189 }
 190
 191 static struct notifier_block kvm_clock_notifier = {
 192         .notifier_call = kvm_clock_sync,
 193 };
 194
 195 int kvm_arch_hardware_setup(void)
 196 {
 197         gmap_notifier.notifier_call = kvm_gmap_notifier;
 198         gmap_register_pte_notifier(&gmap_notifier);
 199         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 200         gmap_register_pte_notifier(&vsie_gmap_notifier);
 201         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 202                                        &kvm_clock_notifier);
 203         return 0;
 204 }
 205
 206 void kvm_arch_hardware_unsetup(void)
 207 {
 208         gmap_unregister_pte_notifier(&gmap_notifier);
 209         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 210         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 211                                          &kvm_clock_notifier);
 212 }
 213
 214 static void allow_cpu_feat(unsigned long nr)
 215 {
 216         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 217 }
 218
 219 static inline int plo_test_bit(unsigned char nr)
 220 {
 221         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 222         int cc = 3; /* subfunction not available */
 223
 224         asm volatile(
 225                 /* Parameter registers are ignored for "test bit" */
 226                 "       plo     0,0,0,0(0)\n"
 227                 "       ipm     %0\n"
 228                 "       srl     %0,28\n"
 229                 : "=d" (cc)
 230                 : "d" (r0)
 231                 : "cc");
 232         return cc == 0;
 233 }
 234
 235 static void kvm_s390_cpu_feat_init(void)
 236 {
 237         int i;
 238
 239         for (i = 0; i < 256; ++i) {
 240                 if (plo_test_bit(i))
 241                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 242         }
 243
 244         if (test_facility(28)) /* TOD-clock steering */
 245                 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
 246
 247         if (test_facility(17)) { /* MSA */
 248                 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
 249                 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
 250                 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
 251                 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
 252                 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
 253         }
 254         if (test_facility(76)) /* MSA3 */
 255                 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
 256         if (test_facility(77)) { /* MSA4 */
 257                 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
 258                 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
 259                 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
 260                 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
 261         }
 262         if (test_facility(57)) /* MSA5 */
 263                 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
 264
 265         if (MACHINE_HAS_ESOP)
 266                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 267         /*
 268          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 269          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 270          */
 271         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 272             !test_facility(3) || !nested)
 273                 return;
 274         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 275         if (sclp.has_64bscao)
 276                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 277         if (sclp.has_siif)
 278                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 279         if (sclp.has_gpere)
 280                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 281         if (sclp.has_gsls)
 282                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 283         if (sclp.has_ib)
 284                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 285         if (sclp.has_cei)
 286                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 287         if (sclp.has_ibs)
 288                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 289         /*
 290          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 291          * all skey handling functions read/set the skey from the PGSTE
 292          * instead of the real storage key.
 293          *
 294          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 295          * pages being detected as preserved although they are resident.
 296          *
 297          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 298          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 299          *
 300          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 301          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 302          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 303          *
 304          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 305          * cannot easily shadow the SCA because of the ipte lock.
 306          */
 307 }
 308
 309 int kvm_arch_init(void *opaque)
 310 {
 311         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 312         if (!kvm_s390_dbf)
 313                 return -ENOMEM;
 314
 315         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 316                 debug_unregister(kvm_s390_dbf);
 317                 return -ENOMEM;
 318         }
 319
 320         kvm_s390_cpu_feat_init();
 321
 322         /* Register floating interrupt controller interface. */
 323         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 324 }
 325
 326 void kvm_arch_exit(void)
 327 {
 328         debug_unregister(kvm_s390_dbf);
 329 }
 330
 331 /* Section: device related */
 332 long kvm_arch_dev_ioctl(struct file *filp,
 333                         unsigned int ioctl, unsigned long arg)
 334 {
 335         if (ioctl == KVM_S390_ENABLE_SIE)
 336                 return s390_enable_sie();
 337         return -EINVAL;
 338 }
 339
 340 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 341 {
 342         int r;
 343
 344         switch (ext) {
 345         case KVM_CAP_S390_PSW:
 346         case KVM_CAP_S390_GMAP:
 347         case KVM_CAP_SYNC_MMU:
 348 #ifdef CONFIG_KVM_S390_UCONTROL
 349         case KVM_CAP_S390_UCONTROL:
 350 #endif
 351         case KVM_CAP_ASYNC_PF:
 352         case KVM_CAP_SYNC_REGS:
 353         case KVM_CAP_ONE_REG:
 354         case KVM_CAP_ENABLE_CAP:
 355         case KVM_CAP_S390_CSS_SUPPORT:
 356         case KVM_CAP_IOEVENTFD:
 357         case KVM_CAP_DEVICE_CTRL:
 358         case KVM_CAP_ENABLE_CAP_VM:
 359         case KVM_CAP_S390_IRQCHIP:
 360         case KVM_CAP_VM_ATTRIBUTES:
 361         case KVM_CAP_MP_STATE:
 362         case KVM_CAP_S390_INJECT_IRQ:
 363         case KVM_CAP_S390_USER_SIGP:
 364         case KVM_CAP_S390_USER_STSI:
 365         case KVM_CAP_S390_SKEYS:
 366         case KVM_CAP_S390_IRQ_STATE:
 367                 r = 1;
 368                 break;
 369         case KVM_CAP_S390_MEM_OP:
 370                 r = MEM_OP_MAX_SIZE;
 371                 break;
 372         case KVM_CAP_NR_VCPUS:
 373         case KVM_CAP_MAX_VCPUS:
 374                 r = KVM_S390_BSCA_CPU_SLOTS;
 375                 if (sclp.has_esca && sclp.has_64bscao)
 376                         r = KVM_S390_ESCA_CPU_SLOTS;
 377                 break;
 378         case KVM_CAP_NR_MEMSLOTS:
 379                 r = KVM_USER_MEM_SLOTS;
 380                 break;
 381         case KVM_CAP_S390_COW:
 382                 r = MACHINE_HAS_ESOP;
 383                 break;
 384         case KVM_CAP_S390_VECTOR_REGISTERS:
 385                 r = MACHINE_HAS_VX;
 386                 break;
 387         case KVM_CAP_S390_RI:
 388                 r = test_facility(64);
 389                 break;
 390         default:
 391                 r = 0;
 392         }
 393         return r;
 394 }
 395
 396 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 397                                         struct kvm_memory_slot *memslot)
 398 {
 399         gfn_t cur_gfn, last_gfn;
 400         unsigned long address;
 401         struct gmap *gmap = kvm->arch.gmap;
 402
 403         /* Loop over all guest pages */
 404         last_gfn = memslot->base_gfn + memslot->npages;
 405         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 406                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 407
 408                 if (test_and_clear_guest_dirty(gmap->mm, address))
 409                         mark_page_dirty(kvm, cur_gfn);
 410                 if (fatal_signal_pending(current))
 411                         return;
 412                 cond_resched();
 413         }
 414 }
 415
 416 /* Section: vm related */
 417 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 418
 419 /*
 420  * Get (and clear) the dirty memory log for a memory slot.
 421  */
 422 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 423                                struct kvm_dirty_log *log)
 424 {
 425         int r;
 426         unsigned long n;
 427         struct kvm_memslots *slots;
 428         struct kvm_memory_slot *memslot;
 429         int is_dirty = 0;
 430
 431         mutex_lock(&kvm->slots_lock);
 432
 433         r = -EINVAL;
 434         if (log->slot >= KVM_USER_MEM_SLOTS)
 435                 goto out;
 436
 437         slots = kvm_memslots(kvm);
 438         memslot = id_to_memslot(slots, log->slot);
 439         r = -ENOENT;
 440         if (!memslot->dirty_bitmap)
 441                 goto out;
 442
 443         kvm_s390_sync_dirty_log(kvm, memslot);
 444         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 445         if (r)
 446                 goto out;
 447
 448         /* Clear the dirty log */
 449         if (is_dirty) {
 450                 n = kvm_dirty_bitmap_bytes(memslot);
 451                 memset(memslot->dirty_bitmap, 0, n);
 452         }
 453         r = 0;
 454 out:
 455         mutex_unlock(&kvm->slots_lock);
 456         return r;
 457 }
 458
 459 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 460 {
 461         int r;
 462
 463         if (cap->flags)
 464                 return -EINVAL;
 465
 466         switch (cap->cap) {
 467         case KVM_CAP_S390_IRQCHIP:
 468                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 469                 kvm->arch.use_irqchip = 1;
 470                 r = 0;
 471                 break;
 472         case KVM_CAP_S390_USER_SIGP:
 473                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 474                 kvm->arch.user_sigp = 1;
 475                 r = 0;
 476                 break;
 477         case KVM_CAP_S390_VECTOR_REGISTERS:
 478                 mutex_lock(&kvm->lock);
 479                 if (kvm->created_vcpus) {
 480                         r = -EBUSY;
 481                 } else if (MACHINE_HAS_VX) {
 482                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 483                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 484                         r = 0;
 485                 } else
 486                         r = -EINVAL;
 487                 mutex_unlock(&kvm->lock);
 488                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 489                          r ? "(not available)" : "(success)");
 490                 break;
 491         case KVM_CAP_S390_RI:
 492                 r = -EINVAL;
 493                 mutex_lock(&kvm->lock);
 494                 if (kvm->created_vcpus) {
 495                         r = -EBUSY;
 496                 } else if (test_facility(64)) {
 497                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 498                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 499                         r = 0;
 500                 }
 501                 mutex_unlock(&kvm->lock);
 502                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 503                          r ? "(not available)" : "(success)");
 504                 break;
 505         case KVM_CAP_S390_USER_STSI:
 506                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 507                 kvm->arch.user_stsi = 1;
 508                 r = 0;
 509                 break;
 510         default:
 511                 r = -EINVAL;
 512                 break;
 513         }
 514         return r;
 515 }
 516
 517 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 518 {
 519         int ret;
 520
 521         switch (attr->attr) {
 522         case KVM_S390_VM_MEM_LIMIT_SIZE:
 523                 ret = 0;
 524                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 525                          kvm->arch.mem_limit);
 526                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 527                         ret = -EFAULT;
 528                 break;
 529         default:
 530                 ret = -ENXIO;
 531                 break;
 532         }
 533         return ret;
 534 }
 535
 536 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 537 {
 538         int ret;
 539         unsigned int idx;
 540         switch (attr->attr) {
 541         case KVM_S390_VM_MEM_ENABLE_CMMA:
 542                 ret = -ENXIO;
 543                 if (!sclp.has_cmma)
 544                         break;
 545
 546                 ret = -EBUSY;
 547                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 548                 mutex_lock(&kvm->lock);
 549                 if (!kvm->created_vcpus) {
 550                         kvm->arch.use_cmma = 1;
 551                         ret = 0;
 552                 }
 553                 mutex_unlock(&kvm->lock);
 554                 break;
 555         case KVM_S390_VM_MEM_CLR_CMMA:
 556                 ret = -ENXIO;
 557                 if (!sclp.has_cmma)
 558                         break;
 559                 ret = -EINVAL;
 560                 if (!kvm->arch.use_cmma)
 561                         break;
 562
 563                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 564                 mutex_lock(&kvm->lock);
 565                 idx = srcu_read_lock(&kvm->srcu);
 566                 s390_reset_cmma(kvm->arch.gmap->mm);
 567                 srcu_read_unlock(&kvm->srcu, idx);
 568                 mutex_unlock(&kvm->lock);
 569                 ret = 0;
 570                 break;
 571         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 572                 unsigned long new_limit;
 573
 574                 if (kvm_is_ucontrol(kvm))
 575                         return -EINVAL;
 576
 577                 if (get_user(new_limit, (u64 __user *)attr->addr))
 578                         return -EFAULT;
 579
 580                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 581                     new_limit > kvm->arch.mem_limit)
 582                         return -E2BIG;
 583
 584                 if (!new_limit)
 585                         return -EINVAL;
 586
 587                 /* gmap_create takes last usable address */
 588                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 589                         new_limit -= 1;
 590
 591                 ret = -EBUSY;
 592                 mutex_lock(&kvm->lock);
 593                 if (!kvm->created_vcpus) {
 594                         /* gmap_create will round the limit up */
 595                         struct gmap *new = gmap_create(current->mm, new_limit);
 596
 597                         if (!new) {
 598                                 ret = -ENOMEM;
 599                         } else {
 600                                 gmap_remove(kvm->arch.gmap);
 601                                 new->private = kvm;
 602                                 kvm->arch.gmap = new;
 603                                 ret = 0;
 604                         }
 605                 }
 606                 mutex_unlock(&kvm->lock);
 607                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 608                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 609                          (void *) kvm->arch.gmap->asce);
 610                 break;
 611         }
 612         default:
 613                 ret = -ENXIO;
 614                 break;
 615         }
 616         return ret;
 617 }
 618
 619 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 620
 621 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 622 {
 623         struct kvm_vcpu *vcpu;
 624         int i;
 625
 626         if (!test_kvm_facility(kvm, 76))
 627                 return -EINVAL;
 628
 629         mutex_lock(&kvm->lock);
 630         switch (attr->attr) {
 631         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 632                 get_random_bytes(
 633                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 634                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 635                 kvm->arch.crypto.aes_kw = 1;
 636                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 637                 break;
 638         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 639                 get_random_bytes(
 640                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 641                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 642                 kvm->arch.crypto.dea_kw = 1;
 643                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 644                 break;
 645         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 646                 kvm->arch.crypto.aes_kw = 0;
 647                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 648                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 649                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 650                 break;
 651         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 652                 kvm->arch.crypto.dea_kw = 0;
 653                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 654                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 655                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 656                 break;
 657         default:
 658                 mutex_unlock(&kvm->lock);
 659                 return -ENXIO;
 660         }
 661
 662         kvm_for_each_vcpu(i, vcpu, kvm) {
 663                 kvm_s390_vcpu_crypto_setup(vcpu);
 664                 exit_sie(vcpu);
 665         }
 666         mutex_unlock(&kvm->lock);
 667         return 0;
 668 }
 669
 670 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 671 {
 672         u8 gtod_high;
 673
 674         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 675                                            sizeof(gtod_high)))
 676                 return -EFAULT;
 677
 678         if (gtod_high != 0)
 679                 return -EINVAL;
 680         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 681
 682         return 0;
 683 }
 684
 685 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 686 {
 687         u64 gtod;
 688
 689         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 690                 return -EFAULT;
 691
 692         kvm_s390_set_tod_clock(kvm, gtod);
 693         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 694         return 0;
 695 }
 696
 697 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 698 {
 699         int ret;
 700
 701         if (attr->flags)
 702                 return -EINVAL;
 703
 704         switch (attr->attr) {
 705         case KVM_S390_VM_TOD_HIGH:
 706                 ret = kvm_s390_set_tod_high(kvm, attr);
 707                 break;
 708         case KVM_S390_VM_TOD_LOW:
 709                 ret = kvm_s390_set_tod_low(kvm, attr);
 710                 break;
 711         default:
 712                 ret = -ENXIO;
 713                 break;
 714         }
 715         return ret;
 716 }
 717
 718 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 719 {
 720         u8 gtod_high = 0;
 721
 722         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 723                                          sizeof(gtod_high)))
 724                 return -EFAULT;
 725         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 726
 727         return 0;
 728 }
 729
 730 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 731 {
 732         u64 gtod;
 733
 734         gtod = kvm_s390_get_tod_clock_fast(kvm);
 735         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 736                 return -EFAULT;
 737         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 738
 739         return 0;
 740 }
 741
 742 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 743 {
 744         int ret;
 745
 746         if (attr->flags)
 747                 return -EINVAL;
 748
 749         switch (attr->attr) {
 750         case KVM_S390_VM_TOD_HIGH:
 751                 ret = kvm_s390_get_tod_high(kvm, attr);
 752                 break;
 753         case KVM_S390_VM_TOD_LOW:
 754                 ret = kvm_s390_get_tod_low(kvm, attr);
 755                 break;
 756         default:
 757                 ret = -ENXIO;
 758                 break;
 759         }
 760         return ret;
 761 }
 762
 763 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 764 {
 765         struct kvm_s390_vm_cpu_processor *proc;
 766         u16 lowest_ibc, unblocked_ibc;
 767         int ret = 0;
 768
 769         mutex_lock(&kvm->lock);
 770         if (kvm->created_vcpus) {
 771                 ret = -EBUSY;
 772                 goto out;
 773         }
 774         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 775         if (!proc) {
 776                 ret = -ENOMEM;
 777                 goto out;
 778         }
 779         if (!copy_from_user(proc, (void __user *)attr->addr,
 780                             sizeof(*proc))) {
 781                 kvm->arch.model.cpuid = proc->cpuid;
 782                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 783                 unblocked_ibc = sclp.ibc & 0xfff;
 784                 if (lowest_ibc) {
 785                         if (proc->ibc > unblocked_ibc)
 786                                 kvm->arch.model.ibc = unblocked_ibc;
 787                         else if (proc->ibc < lowest_ibc)
 788                                 kvm->arch.model.ibc = lowest_ibc;
 789                         else
 790                                 kvm->arch.model.ibc = proc->ibc;
 791                 }
 792                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 793                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 794         } else
 795                 ret = -EFAULT;
 796         kfree(proc);
 797 out:
 798         mutex_unlock(&kvm->lock);
 799         return ret;
 800 }
 801
 802 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 803                                        struct kvm_device_attr *attr)
 804 {
 805         struct kvm_s390_vm_cpu_feat data;
 806         int ret = -EBUSY;
 807
 808         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 809                 return -EFAULT;
 810         if (!bitmap_subset((unsigned long *) data.feat,
 811                            kvm_s390_available_cpu_feat,
 812                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 813                 return -EINVAL;
 814
 815         mutex_lock(&kvm->lock);
 816         if (!atomic_read(&kvm->online_vcpus)) {
 817                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 818                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 819                 ret = 0;
 820         }
 821         mutex_unlock(&kvm->lock);
 822         return ret;
 823 }
 824
 825 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 826                                           struct kvm_device_attr *attr)
 827 {
 828         /*
 829          * Once supported by kernel + hw, we have to store the subfunctions
 830          * in kvm->arch and remember that user space configured them.
 831          */
 832         return -ENXIO;
 833 }
 834
 835 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 836 {
 837         int ret = -ENXIO;
 838
 839         switch (attr->attr) {
 840         case KVM_S390_VM_CPU_PROCESSOR:
 841                 ret = kvm_s390_set_processor(kvm, attr);
 842                 break;
 843         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 844                 ret = kvm_s390_set_processor_feat(kvm, attr);
 845                 break;
 846         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 847                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 848                 break;
 849         }
 850         return ret;
 851 }
 852
 853 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 854 {
 855         struct kvm_s390_vm_cpu_processor *proc;
 856         int ret = 0;
 857
 858         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 859         if (!proc) {
 860                 ret = -ENOMEM;
 861                 goto out;
 862         }
 863         proc->cpuid = kvm->arch.model.cpuid;
 864         proc->ibc = kvm->arch.model.ibc;
 865         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 866                S390_ARCH_FAC_LIST_SIZE_BYTE);
 867         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 868                 ret = -EFAULT;
 869         kfree(proc);
 870 out:
 871         return ret;
 872 }
 873
 874 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 875 {
 876         struct kvm_s390_vm_cpu_machine *mach;
 877         int ret = 0;
 878
 879         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 880         if (!mach) {
 881                 ret = -ENOMEM;
 882                 goto out;
 883         }
 884         get_cpu_id((struct cpuid *) &mach->cpuid);
 885         mach->ibc = sclp.ibc;
 886         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 887                S390_ARCH_FAC_LIST_SIZE_BYTE);
 888         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 889                S390_ARCH_FAC_LIST_SIZE_BYTE);
 890         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 891                 ret = -EFAULT;
 892         kfree(mach);
 893 out:
 894         return ret;
 895 }
 896
 897 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 898                                        struct kvm_device_attr *attr)
 899 {
 900         struct kvm_s390_vm_cpu_feat data;
 901
 902         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 903                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 904         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 905                 return -EFAULT;
 906         return 0;
 907 }
 908
 909 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 910                                      struct kvm_device_attr *attr)
 911 {
 912         struct kvm_s390_vm_cpu_feat data;
 913
 914         bitmap_copy((unsigned long *) data.feat,
 915                     kvm_s390_available_cpu_feat,
 916                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 917         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 918                 return -EFAULT;
 919         return 0;
 920 }
 921
 922 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 923                                           struct kvm_device_attr *attr)
 924 {
 925         /*
 926          * Once we can actually configure subfunctions (kernel + hw support),
 927          * we have to check if they were already set by user space, if so copy
 928          * them from kvm->arch.
 929          */
 930         return -ENXIO;
 931 }
 932
 933 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 934                                         struct kvm_device_attr *attr)
 935 {
 936         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 937             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 938                 return -EFAULT;
 939         return 0;
 940 }
 941 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 942 {
 943         int ret = -ENXIO;
 944
 945         switch (attr->attr) {
 946         case KVM_S390_VM_CPU_PROCESSOR:
 947                 ret = kvm_s390_get_processor(kvm, attr);
 948                 break;
 949         case KVM_S390_VM_CPU_MACHINE:
 950                 ret = kvm_s390_get_machine(kvm, attr);
 951                 break;
 952         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 953                 ret = kvm_s390_get_processor_feat(kvm, attr);
 954                 break;
 955         case KVM_S390_VM_CPU_MACHINE_FEAT:
 956                 ret = kvm_s390_get_machine_feat(kvm, attr);
 957                 break;
 958         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 959                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 960                 break;
 961         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 962                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 963                 break;
 964         }
 965         return ret;
 966 }
 967
 968 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 969 {
 970         int ret;
 971
 972         switch (attr->group) {
 973         case KVM_S390_VM_MEM_CTRL:
 974                 ret = kvm_s390_set_mem_control(kvm, attr);
 975                 break;
 976         case KVM_S390_VM_TOD:
 977                 ret = kvm_s390_set_tod(kvm, attr);
 978                 break;
 979         case KVM_S390_VM_CPU_MODEL:
 980                 ret = kvm_s390_set_cpu_model(kvm, attr);
 981                 break;
 982         case KVM_S390_VM_CRYPTO:
 983                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 984                 break;
 985         default:
 986                 ret = -ENXIO;
 987                 break;
 988         }
 989
 990         return ret;
 991 }
 992
 993 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 994 {
 995         int ret;
 996
 997         switch (attr->group) {
 998         case KVM_S390_VM_MEM_CTRL:
 999                 ret = kvm_s390_get_mem_control(kvm, attr);
1000                 break;
1001         case KVM_S390_VM_TOD:
1002                 ret = kvm_s390_get_tod(kvm, attr);
1003                 break;
1004         case KVM_S390_VM_CPU_MODEL:
1005                 ret = kvm_s390_get_cpu_model(kvm, attr);
1006                 break;
1007         default:
1008                 ret = -ENXIO;
1009                 break;
1010         }
1011
1012         return ret;
1013 }
1014
1015 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1016 {
1017         int ret;
1018
1019         switch (attr->group) {
1020         case KVM_S390_VM_MEM_CTRL:
1021                 switch (attr->attr) {
1022                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1023                 case KVM_S390_VM_MEM_CLR_CMMA:
1024                         ret = sclp.has_cmma ? 0 : -ENXIO;
1025                         break;
1026                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1027                         ret = 0;
1028                         break;
1029                 default:
1030                         ret = -ENXIO;
1031                         break;
1032                 }
1033                 break;
1034         case KVM_S390_VM_TOD:
1035                 switch (attr->attr) {
1036                 case KVM_S390_VM_TOD_LOW:
1037                 case KVM_S390_VM_TOD_HIGH:
1038                         ret = 0;
1039                         break;
1040                 default:
1041                         ret = -ENXIO;
1042                         break;
1043                 }
1044                 break;
1045         case KVM_S390_VM_CPU_MODEL:
1046                 switch (attr->attr) {
1047                 case KVM_S390_VM_CPU_PROCESSOR:
1048                 case KVM_S390_VM_CPU_MACHINE:
1049                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1050                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1051                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1052                         ret = 0;
1053                         break;
1054                 /* configuring subfunctions is not supported yet */
1055                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1056                 default:
1057                         ret = -ENXIO;
1058                         break;
1059                 }
1060                 break;
1061         case KVM_S390_VM_CRYPTO:
1062                 switch (attr->attr) {
1063                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1064                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1065                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1066                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1067                         ret = 0;
1068                         break;
1069                 default:
1070                         ret = -ENXIO;
1071                         break;
1072                 }
1073                 break;
1074         default:
1075                 ret = -ENXIO;
1076                 break;
1077         }
1078
1079         return ret;
1080 }
1081
1082 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1083 {
1084         uint8_t *keys;
1085         uint64_t hva;
1086         int i, r = 0;
1087
1088         if (args->flags != 0)
1089                 return -EINVAL;
1090
1091         /* Is this guest using storage keys? */
1092         if (!mm_use_skey(current->mm))
1093                 return KVM_S390_GET_SKEYS_NONE;
1094
1095         /* Enforce sane limit on memory allocation */
1096         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1097                 return -EINVAL;
1098
1099         keys = kmalloc_array(args->count, sizeof(uint8_t),
1100                              GFP_KERNEL | __GFP_NOWARN);
1101         if (!keys)
1102                 keys = vmalloc(sizeof(uint8_t) * args->count);
1103         if (!keys)
1104                 return -ENOMEM;
1105
1106         down_read(&current->mm->mmap_sem);
1107         for (i = 0; i < args->count; i++) {
1108                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1109                 if (kvm_is_error_hva(hva)) {
1110                         r = -EFAULT;
1111                         break;
1112                 }
1113
1114                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1115                 if (r)
1116                         break;
1117         }
1118         up_read(&current->mm->mmap_sem);
1119
1120         if (!r) {
1121                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1122                                  sizeof(uint8_t) * args->count);
1123                 if (r)
1124                         r = -EFAULT;
1125         }
1126
1127         kvfree(keys);
1128         return r;
1129 }
1130
1131 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1132 {
1133         uint8_t *keys;
1134         uint64_t hva;
1135         int i, r = 0;
1136
1137         if (args->flags != 0)
1138                 return -EINVAL;
1139
1140         /* Enforce sane limit on memory allocation */
1141         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1142                 return -EINVAL;
1143
1144         keys = kmalloc_array(args->count, sizeof(uint8_t),
1145                              GFP_KERNEL | __GFP_NOWARN);
1146         if (!keys)
1147                 keys = vmalloc(sizeof(uint8_t) * args->count);
1148         if (!keys)
1149                 return -ENOMEM;
1150
1151         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1152                            sizeof(uint8_t) * args->count);
1153         if (r) {
1154                 r = -EFAULT;
1155                 goto out;
1156         }
1157
1158         /* Enable storage key handling for the guest */
1159         r = s390_enable_skey();
1160         if (r)
1161                 goto out;
1162
1163         down_read(&current->mm->mmap_sem);
1164         for (i = 0; i < args->count; i++) {
1165                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1166                 if (kvm_is_error_hva(hva)) {
1167                         r = -EFAULT;
1168                         break;
1169                 }
1170
1171                 /* Lowest order bit is reserved */
1172                 if (keys[i] & 0x01) {
1173                         r = -EINVAL;
1174                         break;
1175                 }
1176
1177                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1178                 if (r)
1179                         break;
1180         }
1181         up_read(&current->mm->mmap_sem);
1182 out:
1183         kvfree(keys);
1184         return r;
1185 }
1186
1187 long kvm_arch_vm_ioctl(struct file *filp,
1188                        unsigned int ioctl, unsigned long arg)
1189 {
1190         struct kvm *kvm = filp->private_data;
1191         void __user *argp = (void __user *)arg;
1192         struct kvm_device_attr attr;
1193         int r;
1194
1195         switch (ioctl) {
1196         case KVM_S390_INTERRUPT: {
1197                 struct kvm_s390_interrupt s390int;
1198
1199                 r = -EFAULT;
1200                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1201                         break;
1202                 r = kvm_s390_inject_vm(kvm, &s390int);
1203                 break;
1204         }
1205         case KVM_ENABLE_CAP: {
1206                 struct kvm_enable_cap cap;
1207                 r = -EFAULT;
1208                 if (copy_from_user(&cap, argp, sizeof(cap)))
1209                         break;
1210                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1211                 break;
1212         }
1213         case KVM_CREATE_IRQCHIP: {
1214                 struct kvm_irq_routing_entry routing;
1215
1216                 r = -EINVAL;
1217                 if (kvm->arch.use_irqchip) {
1218                         /* Set up dummy routing. */
1219                         memset(&routing, 0, sizeof(routing));
1220                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1221                 }
1222                 break;
1223         }
1224         case KVM_SET_DEVICE_ATTR: {
1225                 r = -EFAULT;
1226                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1227                         break;
1228                 r = kvm_s390_vm_set_attr(kvm, &attr);
1229                 break;
1230         }
1231         case KVM_GET_DEVICE_ATTR: {
1232                 r = -EFAULT;
1233                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1234                         break;
1235                 r = kvm_s390_vm_get_attr(kvm, &attr);
1236                 break;
1237         }
1238         case KVM_HAS_DEVICE_ATTR: {
1239                 r = -EFAULT;
1240                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1241                         break;
1242                 r = kvm_s390_vm_has_attr(kvm, &attr);
1243                 break;
1244         }
1245         case KVM_S390_GET_SKEYS: {
1246                 struct kvm_s390_skeys args;
1247
1248                 r = -EFAULT;
1249                 if (copy_from_user(&args, argp,
1250                                    sizeof(struct kvm_s390_skeys)))
1251                         break;
1252                 r = kvm_s390_get_skeys(kvm, &args);
1253                 break;
1254         }
1255         case KVM_S390_SET_SKEYS: {
1256                 struct kvm_s390_skeys args;
1257
1258                 r = -EFAULT;
1259                 if (copy_from_user(&args, argp,
1260                                    sizeof(struct kvm_s390_skeys)))
1261                         break;
1262                 r = kvm_s390_set_skeys(kvm, &args);
1263                 break;
1264         }
1265         default:
1266                 r = -ENOTTY;
1267         }
1268
1269         return r;
1270 }
1271
1272 static int kvm_s390_query_ap_config(u8 *config)
1273 {
1274         u32 fcn_code = 0x04000000UL;
1275         u32 cc = 0;
1276
1277         memset(config, 0, 128);
1278         asm volatile(
1279                 "lgr 0,%1\n"
1280                 "lgr 2,%2\n"
1281                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1282                 "0: ipm %0\n"
1283                 "srl %0,28\n"
1284                 "1:\n"
1285                 EX_TABLE(0b, 1b)
1286                 : "+r" (cc)
1287                 : "r" (fcn_code), "r" (config)
1288                 : "cc", "0", "2", "memory"
1289         );
1290
1291         return cc;
1292 }
1293
1294 static int kvm_s390_apxa_installed(void)
1295 {
1296         u8 config[128];
1297         int cc;
1298
1299         if (test_facility(12)) {
1300                 cc = kvm_s390_query_ap_config(config);
1301
1302                 if (cc)
1303                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1304                 else
1305                         return config[0] & 0x40;
1306         }
1307
1308         return 0;
1309 }
1310
1311 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1312 {
1313         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1314
1315         if (kvm_s390_apxa_installed())
1316                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1317         else
1318                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1319 }
1320
1321 static u64 kvm_s390_get_initial_cpuid(void)
1322 {
1323         struct cpuid cpuid;
1324
1325         get_cpu_id(&cpuid);
1326         cpuid.version = 0xff;
1327         return *((u64 *) &cpuid);
1328 }
1329
1330 static void kvm_s390_crypto_init(struct kvm *kvm)
1331 {
1332         if (!test_kvm_facility(kvm, 76))
1333                 return;
1334
1335         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1336         kvm_s390_set_crycb_format(kvm);
1337
1338         /* Enable AES/DEA protected key functions by default */
1339         kvm->arch.crypto.aes_kw = 1;
1340         kvm->arch.crypto.dea_kw = 1;
1341         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1342                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1343         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1344                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1345 }
1346
1347 static void sca_dispose(struct kvm *kvm)
1348 {
1349         if (kvm->arch.use_esca)
1350                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1351         else
1352                 free_page((unsigned long)(kvm->arch.sca));
1353         kvm->arch.sca = NULL;
1354 }
1355
1356 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1357 {
1358         gfp_t alloc_flags = GFP_KERNEL;
1359         int i, rc;
1360         char debug_name[16];
1361         static unsigned long sca_offset;
1362
1363         rc = -EINVAL;
1364 #ifdef CONFIG_KVM_S390_UCONTROL
1365         if (type & ~KVM_VM_S390_UCONTROL)
1366                 goto out_err;
1367         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1368                 goto out_err;
1369 #else
1370         if (type)
1371                 goto out_err;
1372 #endif
1373
1374         rc = s390_enable_sie();
1375         if (rc)
1376                 goto out_err;
1377
1378         rc = -ENOMEM;
1379
1380         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1381
1382         kvm->arch.use_esca = 0; /* start with basic SCA */
1383         if (!sclp.has_64bscao)
1384                 alloc_flags |= GFP_DMA;
1385         rwlock_init(&kvm->arch.sca_lock);
1386         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1387         if (!kvm->arch.sca)
1388                 goto out_err;
1389         spin_lock(&kvm_lock);
1390         sca_offset += 16;
1391         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1392                 sca_offset = 0;
1393         kvm->arch.sca = (struct bsca_block *)
1394                         ((char *) kvm->arch.sca + sca_offset);
1395         spin_unlock(&kvm_lock);
1396
1397         sprintf(debug_name, "kvm-%u", current->pid);
1398
1399         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1400         if (!kvm->arch.dbf)
1401                 goto out_err;
1402
1403         kvm->arch.sie_page2 =
1404              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1405         if (!kvm->arch.sie_page2)
1406                 goto out_err;
1407
1408         /* Populate the facility mask initially. */
1409         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1410                S390_ARCH_FAC_LIST_SIZE_BYTE);
1411         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1412                 if (i < kvm_s390_fac_list_mask_size())
1413                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1414                 else
1415                         kvm->arch.model.fac_mask[i] = 0UL;
1416         }
1417
1418         /* Populate the facility list initially. */
1419         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1420         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1421                S390_ARCH_FAC_LIST_SIZE_BYTE);
1422
1423         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1424         set_kvm_facility(kvm->arch.model.fac_list, 74);
1425
1426         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1427         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1428
1429         kvm_s390_crypto_init(kvm);
1430
1431         spin_lock_init(&kvm->arch.float_int.lock);
1432         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1433                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1434         init_waitqueue_head(&kvm->arch.ipte_wq);
1435         mutex_init(&kvm->arch.ipte_mutex);
1436
1437         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1438         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1439
1440         if (type & KVM_VM_S390_UCONTROL) {
1441                 kvm->arch.gmap = NULL;
1442                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1443         } else {
1444                 if (sclp.hamax == U64_MAX)
1445                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1446                 else
1447                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1448                                                     sclp.hamax + 1);
1449                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1450                 if (!kvm->arch.gmap)
1451                         goto out_err;
1452                 kvm->arch.gmap->private = kvm;
1453                 kvm->arch.gmap->pfault_enabled = 0;
1454         }
1455
1456         kvm->arch.css_support = 0;
1457         kvm->arch.use_irqchip = 0;
1458         kvm->arch.epoch = 0;
1459
1460         spin_lock_init(&kvm->arch.start_stop_lock);
1461         kvm_s390_vsie_init(kvm);
1462         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1463
1464         return 0;
1465 out_err:
1466         free_page((unsigned long)kvm->arch.sie_page2);
1467         debug_unregister(kvm->arch.dbf);
1468         sca_dispose(kvm);
1469         KVM_EVENT(3, "creation of vm failed: %d", rc);
1470         return rc;
1471 }
1472
1473 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1474 {
1475         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1476         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1477         kvm_s390_clear_local_irqs(vcpu);
1478         kvm_clear_async_pf_completion_queue(vcpu);
1479         if (!kvm_is_ucontrol(vcpu->kvm))
1480                 sca_del_vcpu(vcpu);
1481
1482         if (kvm_is_ucontrol(vcpu->kvm))
1483                 gmap_remove(vcpu->arch.gmap);
1484
1485         if (vcpu->kvm->arch.use_cmma)
1486                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1487         free_page((unsigned long)(vcpu->arch.sie_block));
1488
1489         kvm_vcpu_uninit(vcpu);
1490         kmem_cache_free(kvm_vcpu_cache, vcpu);
1491 }
1492
1493 static void kvm_free_vcpus(struct kvm *kvm)
1494 {
1495         unsigned int i;
1496         struct kvm_vcpu *vcpu;
1497
1498         kvm_for_each_vcpu(i, vcpu, kvm)
1499                 kvm_arch_vcpu_destroy(vcpu);
1500
1501         mutex_lock(&kvm->lock);
1502         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1503                 kvm->vcpus[i] = NULL;
1504
1505         atomic_set(&kvm->online_vcpus, 0);
1506         mutex_unlock(&kvm->lock);
1507 }
1508
1509 void kvm_arch_destroy_vm(struct kvm *kvm)
1510 {
1511         kvm_free_vcpus(kvm);
1512         sca_dispose(kvm);
1513         debug_unregister(kvm->arch.dbf);
1514         free_page((unsigned long)kvm->arch.sie_page2);
1515         if (!kvm_is_ucontrol(kvm))
1516                 gmap_remove(kvm->arch.gmap);
1517         kvm_s390_destroy_adapters(kvm);
1518         kvm_s390_clear_float_irqs(kvm);
1519         kvm_s390_vsie_destroy(kvm);
1520         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1521 }
1522
1523 /* Section: vcpu related */
1524 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1525 {
1526         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1527         if (!vcpu->arch.gmap)
1528                 return -ENOMEM;
1529         vcpu->arch.gmap->private = vcpu->kvm;
1530
1531         return 0;
1532 }
1533
1534 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1535 {
1536         read_lock(&vcpu->kvm->arch.sca_lock);
1537         if (vcpu->kvm->arch.use_esca) {
1538                 struct esca_block *sca = vcpu->kvm->arch.sca;
1539
1540                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1541                 sca->cpu[vcpu->vcpu_id].sda = 0;
1542         } else {
1543                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1544
1545                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1546                 sca->cpu[vcpu->vcpu_id].sda = 0;
1547         }
1548         read_unlock(&vcpu->kvm->arch.sca_lock);
1549 }
1550
1551 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1552 {
1553         read_lock(&vcpu->kvm->arch.sca_lock);
1554         if (vcpu->kvm->arch.use_esca) {
1555                 struct esca_block *sca = vcpu->kvm->arch.sca;
1556
1557                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1558                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1559                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1560                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1561                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1562         } else {
1563                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1564
1565                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1566                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1567                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1568                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1569         }
1570         read_unlock(&vcpu->kvm->arch.sca_lock);
1571 }
1572
1573 /* Basic SCA to Extended SCA data copy routines */
1574 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1575 {
1576         d->sda = s->sda;
1577         d->sigp_ctrl.c = s->sigp_ctrl.c;
1578         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1579 }
1580
1581 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1582 {
1583         int i;
1584
1585         d->ipte_control = s->ipte_control;
1586         d->mcn[0] = s->mcn;
1587         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1588                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1589 }
1590
1591 static int sca_switch_to_extended(struct kvm *kvm)
1592 {
1593         struct bsca_block *old_sca = kvm->arch.sca;
1594         struct esca_block *new_sca;
1595         struct kvm_vcpu *vcpu;
1596         unsigned int vcpu_idx;
1597         u32 scaol, scaoh;
1598
1599         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1600         if (!new_sca)
1601                 return -ENOMEM;
1602
1603         scaoh = (u32)((u64)(new_sca) >> 32);
1604         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1605
1606         kvm_s390_vcpu_block_all(kvm);
1607         write_lock(&kvm->arch.sca_lock);
1608
1609         sca_copy_b_to_e(new_sca, old_sca);
1610
1611         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1612                 vcpu->arch.sie_block->scaoh = scaoh;
1613                 vcpu->arch.sie_block->scaol = scaol;
1614                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1615         }
1616         kvm->arch.sca = new_sca;
1617         kvm->arch.use_esca = 1;
1618
1619         write_unlock(&kvm->arch.sca_lock);
1620         kvm_s390_vcpu_unblock_all(kvm);
1621
1622         free_page((unsigned long)old_sca);
1623
1624         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1625                  old_sca, kvm->arch.sca);
1626         return 0;
1627 }
1628
1629 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1630 {
1631         int rc;
1632
1633         if (id < KVM_S390_BSCA_CPU_SLOTS)
1634                 return true;
1635         if (!sclp.has_esca || !sclp.has_64bscao)
1636                 return false;
1637
1638         mutex_lock(&kvm->lock);
1639         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1640         mutex_unlock(&kvm->lock);
1641
1642         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1643 }
1644
1645 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1646 {
1647         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1648         kvm_clear_async_pf_completion_queue(vcpu);
1649         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1650                                     KVM_SYNC_GPRS |
1651                                     KVM_SYNC_ACRS |
1652                                     KVM_SYNC_CRS |
1653                                     KVM_SYNC_ARCH0 |
1654                                     KVM_SYNC_PFAULT;
1655         if (test_kvm_facility(vcpu->kvm, 64))
1656                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1657         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1658          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1659          */
1660         if (MACHINE_HAS_VX)
1661                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1662         else
1663                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1664
1665         if (kvm_is_ucontrol(vcpu->kvm))
1666                 return __kvm_ucontrol_vcpu_init(vcpu);
1667
1668         return 0;
1669 }
1670
1671 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1672 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1673 {
1674         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1675         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1676         vcpu->arch.cputm_start = get_tod_clock_fast();
1677         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1678 }
1679
1680 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1681 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1682 {
1683         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1684         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1685         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1686         vcpu->arch.cputm_start = 0;
1687         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1688 }
1689
1690 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1691 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1692 {
1693         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1694         vcpu->arch.cputm_enabled = true;
1695         __start_cpu_timer_accounting(vcpu);
1696 }
1697
1698 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1699 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1700 {
1701         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1702         __stop_cpu_timer_accounting(vcpu);
1703         vcpu->arch.cputm_enabled = false;
1704 }
1705
1706 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1707 {
1708         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1709         __enable_cpu_timer_accounting(vcpu);
1710         preempt_enable();
1711 }
1712
1713 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1714 {
1715         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1716         __disable_cpu_timer_accounting(vcpu);
1717         preempt_enable();
1718 }
1719
1720 /* set the cpu timer - may only be called from the VCPU thread itself */
1721 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1722 {
1723         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1724         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1725         if (vcpu->arch.cputm_enabled)
1726                 vcpu->arch.cputm_start = get_tod_clock_fast();
1727         vcpu->arch.sie_block->cputm = cputm;
1728         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1729         preempt_enable();
1730 }
1731
1732 /* update and get the cpu timer - can also be called from other VCPU threads */
1733 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1734 {
1735         unsigned int seq;
1736         __u64 value;
1737
1738         if (unlikely(!vcpu->arch.cputm_enabled))
1739                 return vcpu->arch.sie_block->cputm;
1740
1741         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1742         do {
1743                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1744                 /*
1745                  * If the writer would ever execute a read in the critical
1746                  * section, e.g. in irq context, we have a deadlock.
1747                  */
1748                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1749                 value = vcpu->arch.sie_block->cputm;
1750                 /* if cputm_start is 0, accounting is being started/stopped */
1751                 if (likely(vcpu->arch.cputm_start))
1752                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1753         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1754         preempt_enable();
1755         return value;
1756 }
1757
1758 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1759 {
1760         /* Save host register state */
1761         save_fpu_regs();
1762         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1763         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1764
1765         if (MACHINE_HAS_VX)
1766                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1767         else
1768                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1769         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1770         if (test_fp_ctl(current->thread.fpu.fpc))
1771                 /* User space provided an invalid FPC, let's clear it */
1772                 current->thread.fpu.fpc = 0;
1773
1774         save_access_regs(vcpu->arch.host_acrs);
1775         restore_access_regs(vcpu->run->s.regs.acrs);
1776         gmap_enable(vcpu->arch.enabled_gmap);
1777         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1778         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1779                 __start_cpu_timer_accounting(vcpu);
1780         vcpu->cpu = cpu;
1781 }
1782
1783 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1784 {
1785         vcpu->cpu = -1;
1786         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1787                 __stop_cpu_timer_accounting(vcpu);
1788         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1789         vcpu->arch.enabled_gmap = gmap_get_enabled();
1790         gmap_disable(vcpu->arch.enabled_gmap);
1791
1792         /* Save guest register state */
1793         save_fpu_regs();
1794         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1795
1796         /* Restore host register state */
1797         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1798         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1799
1800         save_access_regs(vcpu->run->s.regs.acrs);
1801         restore_access_regs(vcpu->arch.host_acrs);
1802 }
1803
1804 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1805 {
1806         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1807         vcpu->arch.sie_block->gpsw.mask = 0UL;
1808         vcpu->arch.sie_block->gpsw.addr = 0UL;
1809         kvm_s390_set_prefix(vcpu, 0);
1810         kvm_s390_set_cpu_timer(vcpu, 0);
1811         vcpu->arch.sie_block->ckc       = 0UL;
1812         vcpu->arch.sie_block->todpr     = 0;
1813         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1814         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1815         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1816         /* make sure the new fpc will be lazily loaded */
1817         save_fpu_regs();
1818         current->thread.fpu.fpc = 0;
1819         vcpu->arch.sie_block->gbea = 1;
1820         vcpu->arch.sie_block->pp = 0;
1821         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1822         kvm_clear_async_pf_completion_queue(vcpu);
1823         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1824                 kvm_s390_vcpu_stop(vcpu);
1825         kvm_s390_clear_local_irqs(vcpu);
1826 }
1827
1828 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1829 {
1830         mutex_lock(&vcpu->kvm->lock);
1831         preempt_disable();
1832         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1833         preempt_enable();
1834         mutex_unlock(&vcpu->kvm->lock);
1835         if (!kvm_is_ucontrol(vcpu->kvm)) {
1836                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1837                 sca_add_vcpu(vcpu);
1838         }
1839         /* make vcpu_load load the right gmap on the first trigger */
1840         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1841 }
1842
1843 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1844 {
1845         if (!test_kvm_facility(vcpu->kvm, 76))
1846                 return;
1847
1848         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1849
1850         if (vcpu->kvm->arch.crypto.aes_kw)
1851                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1852         if (vcpu->kvm->arch.crypto.dea_kw)
1853                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1854
1855         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1856 }
1857
1858 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1859 {
1860         free_page(vcpu->arch.sie_block->cbrlo);
1861         vcpu->arch.sie_block->cbrlo = 0;
1862 }
1863
1864 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1865 {
1866         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1867         if (!vcpu->arch.sie_block->cbrlo)
1868                 return -ENOMEM;
1869
1870         vcpu->arch.sie_block->ecb2 |= 0x80;
1871         vcpu->arch.sie_block->ecb2 &= ~0x08;
1872         return 0;
1873 }
1874
1875 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1876 {
1877         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1878
1879         vcpu->arch.sie_block->ibc = model->ibc;
1880         if (test_kvm_facility(vcpu->kvm, 7))
1881                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1882 }
1883
1884 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1885 {
1886         int rc = 0;
1887
1888         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1889                                                     CPUSTAT_SM |
1890                                                     CPUSTAT_STOPPED);
1891
1892         if (test_kvm_facility(vcpu->kvm, 78))
1893                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1894         else if (test_kvm_facility(vcpu->kvm, 8))
1895                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1896
1897         kvm_s390_vcpu_setup_model(vcpu);
1898
1899         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1900         if (MACHINE_HAS_ESOP)
1901                 vcpu->arch.sie_block->ecb |= 0x02;
1902         if (test_kvm_facility(vcpu->kvm, 9))
1903                 vcpu->arch.sie_block->ecb |= 0x04;
1904         if (test_kvm_facility(vcpu->kvm, 73))
1905                 vcpu->arch.sie_block->ecb |= 0x10;
1906
1907         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1908                 vcpu->arch.sie_block->ecb2 |= 0x08;
1909         vcpu->arch.sie_block->eca = 0x1002000U;
1910         if (sclp.has_cei)
1911                 vcpu->arch.sie_block->eca |= 0x80000000U;
1912         if (sclp.has_ib)
1913                 vcpu->arch.sie_block->eca |= 0x40000000U;
1914         if (sclp.has_siif)
1915                 vcpu->arch.sie_block->eca |= 1;
1916         if (sclp.has_sigpif)
1917                 vcpu->arch.sie_block->eca |= 0x10000000U;
1918         if (test_kvm_facility(vcpu->kvm, 64))
1919                 vcpu->arch.sie_block->ecb3 |= 0x01;
1920         if (test_kvm_facility(vcpu->kvm, 129)) {
1921                 vcpu->arch.sie_block->eca |= 0x00020000;
1922                 vcpu->arch.sie_block->ecd |= 0x20000000;
1923         }
1924         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1925         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1926         if (test_kvm_facility(vcpu->kvm, 74))
1927                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1928
1929         if (vcpu->kvm->arch.use_cmma) {
1930                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1931                 if (rc)
1932                         return rc;
1933         }
1934         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1935         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1936
1937         kvm_s390_vcpu_crypto_setup(vcpu);
1938
1939         return rc;
1940 }
1941
1942 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1943                                       unsigned int id)
1944 {
1945         struct kvm_vcpu *vcpu;
1946         struct sie_page *sie_page;
1947         int rc = -EINVAL;
1948
1949         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1950                 goto out;
1951
1952         rc = -ENOMEM;
1953
1954         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1955         if (!vcpu)
1956                 goto out;
1957
1958         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1959         if (!sie_page)
1960                 goto out_free_cpu;
1961
1962         vcpu->arch.sie_block = &sie_page->sie_block;
1963         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1964
1965         /* the real guest size will always be smaller than msl */
1966         vcpu->arch.sie_block->mso = 0;
1967         vcpu->arch.sie_block->msl = sclp.hamax;
1968
1969         vcpu->arch.sie_block->icpua = id;
1970         spin_lock_init(&vcpu->arch.local_int.lock);
1971         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1972         vcpu->arch.local_int.wq = &vcpu->wq;
1973         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1974         seqcount_init(&vcpu->arch.cputm_seqcount);
1975
1976         rc = kvm_vcpu_init(vcpu, kvm, id);
1977         if (rc)
1978                 goto out_free_sie_block;
1979         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1980                  vcpu->arch.sie_block);
1981         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1982
1983         return vcpu;
1984 out_free_sie_block:
1985         free_page((unsigned long)(vcpu->arch.sie_block));
1986 out_free_cpu:
1987         kmem_cache_free(kvm_vcpu_cache, vcpu);
1988 out:
1989         return ERR_PTR(rc);
1990 }
1991
1992 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1993 {
1994         return kvm_s390_vcpu_has_irq(vcpu, 0);
1995 }
1996
1997 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1998 {
1999         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2000         exit_sie(vcpu);
2001 }
2002
2003 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2004 {
2005         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2006 }
2007
2008 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2009 {
2010         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2011         exit_sie(vcpu);
2012 }
2013
2014 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2015 {
2016         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2017 }
2018
2019 /*
2020  * Kick a guest cpu out of SIE and wait until SIE is not running.
2021  * If the CPU is not running (e.g. waiting as idle) the function will
2022  * return immediately. */
2023 void exit_sie(struct kvm_vcpu *vcpu)
2024 {
2025         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2026         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2027                 cpu_relax();
2028 }
2029
2030 /* Kick a guest cpu out of SIE to process a request synchronously */
2031 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2032 {
2033         kvm_make_request(req, vcpu);
2034         kvm_s390_vcpu_request(vcpu);
2035 }
2036
2037 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2038                               unsigned long end)
2039 {
2040         struct kvm *kvm = gmap->private;
2041         struct kvm_vcpu *vcpu;
2042         unsigned long prefix;
2043         int i;
2044
2045         if (gmap_is_shadow(gmap))
2046                 return;
2047         if (start >= 1UL << 31)
2048                 /* We are only interested in prefix pages */
2049                 return;
2050         kvm_for_each_vcpu(i, vcpu, kvm) {
2051                 /* match against both prefix pages */
2052                 prefix = kvm_s390_get_prefix(vcpu);
2053                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2054                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2055                                    start, end);
2056                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2057                 }
2058         }
2059 }
2060
2061 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2062 {
2063         /* kvm common code refers to this, but never calls it */
2064         BUG();
2065         return 0;
2066 }
2067
2068 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2069                                            struct kvm_one_reg *reg)
2070 {
2071         int r = -EINVAL;
2072
2073         switch (reg->id) {
2074         case KVM_REG_S390_TODPR:
2075                 r = put_user(vcpu->arch.sie_block->todpr,
2076                              (u32 __user *)reg->addr);
2077                 break;
2078         case KVM_REG_S390_EPOCHDIFF:
2079                 r = put_user(vcpu->arch.sie_block->epoch,
2080                              (u64 __user *)reg->addr);
2081                 break;
2082         case KVM_REG_S390_CPU_TIMER:
2083                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2084                              (u64 __user *)reg->addr);
2085                 break;
2086         case KVM_REG_S390_CLOCK_COMP:
2087                 r = put_user(vcpu->arch.sie_block->ckc,
2088                              (u64 __user *)reg->addr);
2089                 break;
2090         case KVM_REG_S390_PFTOKEN:
2091                 r = put_user(vcpu->arch.pfault_token,
2092                              (u64 __user *)reg->addr);
2093                 break;
2094         case KVM_REG_S390_PFCOMPARE:
2095                 r = put_user(vcpu->arch.pfault_compare,
2096                              (u64 __user *)reg->addr);
2097                 break;
2098         case KVM_REG_S390_PFSELECT:
2099                 r = put_user(vcpu->arch.pfault_select,
2100                              (u64 __user *)reg->addr);
2101                 break;
2102         case KVM_REG_S390_PP:
2103                 r = put_user(vcpu->arch.sie_block->pp,
2104                              (u64 __user *)reg->addr);
2105                 break;
2106         case KVM_REG_S390_GBEA:
2107                 r = put_user(vcpu->arch.sie_block->gbea,
2108                              (u64 __user *)reg->addr);
2109                 break;
2110         default:
2111                 break;
2112         }
2113
2114         return r;
2115 }
2116
2117 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2118                                            struct kvm_one_reg *reg)
2119 {
2120         int r = -EINVAL;
2121         __u64 val;
2122
2123         switch (reg->id) {
2124         case KVM_REG_S390_TODPR:
2125                 r = get_user(vcpu->arch.sie_block->todpr,
2126                              (u32 __user *)reg->addr);
2127                 break;
2128         case KVM_REG_S390_EPOCHDIFF:
2129                 r = get_user(vcpu->arch.sie_block->epoch,
2130                              (u64 __user *)reg->addr);
2131                 break;
2132         case KVM_REG_S390_CPU_TIMER:
2133                 r = get_user(val, (u64 __user *)reg->addr);
2134                 if (!r)
2135                         kvm_s390_set_cpu_timer(vcpu, val);
2136                 break;
2137         case KVM_REG_S390_CLOCK_COMP:
2138                 r = get_user(vcpu->arch.sie_block->ckc,
2139                              (u64 __user *)reg->addr);
2140                 break;
2141         case KVM_REG_S390_PFTOKEN:
2142                 r = get_user(vcpu->arch.pfault_token,
2143                              (u64 __user *)reg->addr);
2144                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2145                         kvm_clear_async_pf_completion_queue(vcpu);
2146                 break;
2147         case KVM_REG_S390_PFCOMPARE:
2148                 r = get_user(vcpu->arch.pfault_compare,
2149                              (u64 __user *)reg->addr);
2150                 break;
2151         case KVM_REG_S390_PFSELECT:
2152                 r = get_user(vcpu->arch.pfault_select,
2153                              (u64 __user *)reg->addr);
2154                 break;
2155         case KVM_REG_S390_PP:
2156                 r = get_user(vcpu->arch.sie_block->pp,
2157                              (u64 __user *)reg->addr);
2158                 break;
2159         case KVM_REG_S390_GBEA:
2160                 r = get_user(vcpu->arch.sie_block->gbea,
2161                              (u64 __user *)reg->addr);
2162                 break;
2163         default:
2164                 break;
2165         }
2166
2167         return r;
2168 }
2169
2170 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2171 {
2172         kvm_s390_vcpu_initial_reset(vcpu);
2173         return 0;
2174 }
2175
2176 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2177 {
2178         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2179         return 0;
2180 }
2181
2182 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2183 {
2184         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2185         return 0;
2186 }
2187
2188 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2189                                   struct kvm_sregs *sregs)
2190 {
2191         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2192         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2193         restore_access_regs(vcpu->run->s.regs.acrs);
2194         return 0;
2195 }
2196
2197 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2198                                   struct kvm_sregs *sregs)
2199 {
2200         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2201         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2202         return 0;
2203 }
2204
2205 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2206 {
2207         /* make sure the new values will be lazily loaded */
2208         save_fpu_regs();
2209         if (test_fp_ctl(fpu->fpc))
2210                 return -EINVAL;
2211         current->thread.fpu.fpc = fpu->fpc;
2212         if (MACHINE_HAS_VX)
2213                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2214         else
2215                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2216         return 0;
2217 }
2218
2219 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2220 {
2221         /* make sure we have the latest values */
2222         save_fpu_regs();
2223         if (MACHINE_HAS_VX)
2224                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2225         else
2226                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2227         fpu->fpc = current->thread.fpu.fpc;
2228         return 0;
2229 }
2230
2231 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2232 {
2233         int rc = 0;
2234
2235         if (!is_vcpu_stopped(vcpu))
2236                 rc = -EBUSY;
2237         else {
2238                 vcpu->run->psw_mask = psw.mask;
2239                 vcpu->run->psw_addr = psw.addr;
2240         }
2241         return rc;
2242 }
2243
2244 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2245                                   struct kvm_translation *tr)
2246 {
2247         return -EINVAL; /* not implemented yet */
2248 }
2249
2250 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2251                               KVM_GUESTDBG_USE_HW_BP | \
2252                               KVM_GUESTDBG_ENABLE)
2253
2254 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2255                                         struct kvm_guest_debug *dbg)
2256 {
2257         int rc = 0;
2258
2259         vcpu->guest_debug = 0;
2260         kvm_s390_clear_bp_data(vcpu);
2261
2262         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2263                 return -EINVAL;
2264         if (!sclp.has_gpere)
2265                 return -EINVAL;
2266
2267         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2268                 vcpu->guest_debug = dbg->control;
2269                 /* enforce guest PER */
2270                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2271
2272                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2273                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2274         } else {
2275                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2276                 vcpu->arch.guestdbg.last_bp = 0;
2277         }
2278
2279         if (rc) {
2280                 vcpu->guest_debug = 0;
2281                 kvm_s390_clear_bp_data(vcpu);
2282                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2283         }
2284
2285         return rc;
2286 }
2287
2288 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2289                                     struct kvm_mp_state *mp_state)
2290 {
2291         /* CHECK_STOP and LOAD are not supported yet */
2292         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2293                                        KVM_MP_STATE_OPERATING;
2294 }
2295
2296 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2297                                     struct kvm_mp_state *mp_state)
2298 {
2299         int rc = 0;
2300
2301         /* user space knows about this interface - let it control the state */
2302         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2303
2304         switch (mp_state->mp_state) {
2305         case KVM_MP_STATE_STOPPED:
2306                 kvm_s390_vcpu_stop(vcpu);
2307                 break;
2308         case KVM_MP_STATE_OPERATING:
2309                 kvm_s390_vcpu_start(vcpu);
2310                 break;
2311         case KVM_MP_STATE_LOAD:
2312         case KVM_MP_STATE_CHECK_STOP:
2313                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2314         default:
2315                 rc = -ENXIO;
2316         }
2317
2318         return rc;
2319 }
2320
2321 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2322 {
2323         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2324 }
2325
2326 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2327 {
2328 retry:
2329         kvm_s390_vcpu_request_handled(vcpu);
2330         if (!vcpu->requests)
2331                 return 0;
2332         /*
2333          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2334          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2335          * This ensures that the ipte instruction for this request has
2336          * already finished. We might race against a second unmapper that
2337          * wants to set the blocking bit. Lets just retry the request loop.
2338          */
2339         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2340                 int rc;
2341                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2342                                           kvm_s390_get_prefix(vcpu),
2343                                           PAGE_SIZE * 2, PROT_WRITE);
2344                 if (rc)
2345                         return rc;
2346                 goto retry;
2347         }
2348
2349         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2350                 vcpu->arch.sie_block->ihcpu = 0xffff;
2351                 goto retry;
2352         }
2353
2354         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2355                 if (!ibs_enabled(vcpu)) {
2356                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2357                         atomic_or(CPUSTAT_IBS,
2358                                         &vcpu->arch.sie_block->cpuflags);
2359                 }
2360                 goto retry;
2361         }
2362
2363         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2364                 if (ibs_enabled(vcpu)) {
2365                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2366                         atomic_andnot(CPUSTAT_IBS,
2367                                           &vcpu->arch.sie_block->cpuflags);
2368                 }
2369                 goto retry;
2370         }
2371
2372         /* nothing to do, just clear the request */
2373         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2374
2375         return 0;
2376 }
2377
2378 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2379 {
2380         struct kvm_vcpu *vcpu;
2381         int i;
2382
2383         mutex_lock(&kvm->lock);
2384         preempt_disable();
2385         kvm->arch.epoch = tod - get_tod_clock();
2386         kvm_s390_vcpu_block_all(kvm);
2387         kvm_for_each_vcpu(i, vcpu, kvm)
2388                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2389         kvm_s390_vcpu_unblock_all(kvm);
2390         preempt_enable();
2391         mutex_unlock(&kvm->lock);
2392 }
2393
2394 /**
2395  * kvm_arch_fault_in_page - fault-in guest page if necessary
2396  * @vcpu: The corresponding virtual cpu
2397  * @gpa: Guest physical address
2398  * @writable: Whether the page should be writable or not
2399  *
2400  * Make sure that a guest page has been faulted-in on the host.
2401  *
2402  * Return: Zero on success, negative error code otherwise.
2403  */
2404 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2405 {
2406         return gmap_fault(vcpu->arch.gmap, gpa,
2407                           writable ? FAULT_FLAG_WRITE : 0);
2408 }
2409
2410 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2411                                       unsigned long token)
2412 {
2413         struct kvm_s390_interrupt inti;
2414         struct kvm_s390_irq irq;
2415
2416         if (start_token) {
2417                 irq.u.ext.ext_params2 = token;
2418                 irq.type = KVM_S390_INT_PFAULT_INIT;
2419                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2420         } else {
2421                 inti.type = KVM_S390_INT_PFAULT_DONE;
2422                 inti.parm64 = token;
2423                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2424         }
2425 }
2426
2427 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2428                                      struct kvm_async_pf *work)
2429 {
2430         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2431         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2432 }
2433
2434 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2435                                  struct kvm_async_pf *work)
2436 {
2437         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2438         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2439 }
2440
2441 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2442                                struct kvm_async_pf *work)
2443 {
2444         /* s390 will always inject the page directly */
2445 }
2446
2447 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2448 {
2449         /*
2450          * s390 will always inject the page directly,
2451          * but we still want check_async_completion to cleanup
2452          */
2453         return true;
2454 }
2455
2456 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2457 {
2458         hva_t hva;
2459         struct kvm_arch_async_pf arch;
2460         int rc;
2461
2462         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2463                 return 0;
2464         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2465             vcpu->arch.pfault_compare)
2466                 return 0;
2467         if (psw_extint_disabled(vcpu))
2468                 return 0;
2469         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2470                 return 0;
2471         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2472                 return 0;
2473         if (!vcpu->arch.gmap->pfault_enabled)
2474                 return 0;
2475
2476         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2477         hva += current->thread.gmap_addr & ~PAGE_MASK;
2478         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2479                 return 0;
2480
2481         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2482         return rc;
2483 }
2484
2485 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2486 {
2487         int rc, cpuflags;
2488
2489         /*
2490          * On s390 notifications for arriving pages will be delivered directly
2491          * to the guest but the house keeping for completed pfaults is
2492          * handled outside the worker.
2493          */
2494         kvm_check_async_pf_completion(vcpu);
2495
2496         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2497         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2498
2499         if (need_resched())
2500                 schedule();
2501
2502         if (test_cpu_flag(CIF_MCCK_PENDING))
2503                 s390_handle_mcck();
2504
2505         if (!kvm_is_ucontrol(vcpu->kvm)) {
2506                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2507                 if (rc)
2508                         return rc;
2509         }
2510
2511         rc = kvm_s390_handle_requests(vcpu);
2512         if (rc)
2513                 return rc;
2514
2515         if (guestdbg_enabled(vcpu)) {
2516                 kvm_s390_backup_guest_per_regs(vcpu);
2517                 kvm_s390_patch_guest_per_regs(vcpu);
2518         }
2519
2520         vcpu->arch.sie_block->icptcode = 0;
2521         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2522         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2523         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2524
2525         return 0;
2526 }
2527
2528 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2529 {
2530         struct kvm_s390_pgm_info pgm_info = {
2531                 .code = PGM_ADDRESSING,
2532         };
2533         u8 opcode, ilen;
2534         int rc;
2535
2536         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2537         trace_kvm_s390_sie_fault(vcpu);
2538
2539         /*
2540          * We want to inject an addressing exception, which is defined as a
2541          * suppressing or terminating exception. However, since we came here
2542          * by a DAT access exception, the PSW still points to the faulting
2543          * instruction since DAT exceptions are nullifying. So we've got
2544          * to look up the current opcode to get the length of the instruction
2545          * to be able to forward the PSW.
2546          */
2547         rc = read_guest_instr(vcpu, &opcode, 1);
2548         ilen = insn_length(opcode);
2549         if (rc < 0) {
2550                 return rc;
2551         } else if (rc) {
2552                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2553                  * Forward by arbitrary ilc, injection will take care of
2554                  * nullification if necessary.
2555                  */
2556                 pgm_info = vcpu->arch.pgm;
2557                 ilen = 4;
2558         }
2559         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2560         kvm_s390_forward_psw(vcpu, ilen);
2561         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2562 }
2563
2564 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2565 {
2566         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2567                    vcpu->arch.sie_block->icptcode);
2568         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2569
2570         if (guestdbg_enabled(vcpu))
2571                 kvm_s390_restore_guest_per_regs(vcpu);
2572
2573         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2574         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2575
2576         if (vcpu->arch.sie_block->icptcode > 0) {
2577                 int rc = kvm_handle_sie_intercept(vcpu);
2578
2579                 if (rc != -EOPNOTSUPP)
2580                         return rc;
2581                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2582                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2583                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2584                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2585                 return -EREMOTE;
2586         } else if (exit_reason != -EFAULT) {
2587                 vcpu->stat.exit_null++;
2588                 return 0;
2589         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2590                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2591                 vcpu->run->s390_ucontrol.trans_exc_code =
2592                                                 current->thread.gmap_addr;
2593                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2594                 return -EREMOTE;
2595         } else if (current->thread.gmap_pfault) {
2596                 trace_kvm_s390_major_guest_pfault(vcpu);
2597                 current->thread.gmap_pfault = 0;
2598                 if (kvm_arch_setup_async_pf(vcpu))
2599                         return 0;
2600                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2601         }
2602         return vcpu_post_run_fault_in_sie(vcpu);
2603 }
2604
2605 static int __vcpu_run(struct kvm_vcpu *vcpu)
2606 {
2607         int rc, exit_reason;
2608
2609         /*
2610          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2611          * ning the guest), so that memslots (and other stuff) are protected
2612          */
2613         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2614
2615         do {
2616                 rc = vcpu_pre_run(vcpu);
2617                 if (rc)
2618                         break;
2619
2620                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2621                 /*
2622                  * As PF_VCPU will be used in fault handler, between
2623                  * guest_enter and guest_exit should be no uaccess.
2624                  */
2625                 local_irq_disable();
2626                 __kvm_guest_enter();
2627                 __disable_cpu_timer_accounting(vcpu);
2628                 local_irq_enable();
2629                 exit_reason = sie64a(vcpu->arch.sie_block,
2630                                      vcpu->run->s.regs.gprs);
2631                 local_irq_disable();
2632                 __enable_cpu_timer_accounting(vcpu);
2633                 __kvm_guest_exit();
2634                 local_irq_enable();
2635                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2636
2637                 rc = vcpu_post_run(vcpu, exit_reason);
2638         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2639
2640         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2641         return rc;
2642 }
2643
2644 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2645 {
2646         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2647         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2648         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2649                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2650         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2651                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2652                 /* some control register changes require a tlb flush */
2653                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2654         }
2655         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2656                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2657                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2658                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2659                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2660                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2661         }
2662         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2663                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2664                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2665                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2666                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2667                         kvm_clear_async_pf_completion_queue(vcpu);
2668         }
2669         kvm_run->kvm_dirty_regs = 0;
2670 }
2671
2672 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2673 {
2674         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2675         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2676         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2677         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2678         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2679         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2680         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2681         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2682         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2683         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2684         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2685         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2686 }
2687
2688 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2689 {
2690         int rc;
2691         sigset_t sigsaved;
2692
2693         if (guestdbg_exit_pending(vcpu)) {
2694                 kvm_s390_prepare_debug_exit(vcpu);
2695                 return 0;
2696         }
2697
2698         if (vcpu->sigset_active)
2699                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2700
2701         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2702                 kvm_s390_vcpu_start(vcpu);
2703         } else if (is_vcpu_stopped(vcpu)) {
2704                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2705                                    vcpu->vcpu_id);
2706                 return -EINVAL;
2707         }
2708
2709         sync_regs(vcpu, kvm_run);
2710         enable_cpu_timer_accounting(vcpu);
2711
2712         might_fault();
2713         rc = __vcpu_run(vcpu);
2714
2715         if (signal_pending(current) && !rc) {
2716                 kvm_run->exit_reason = KVM_EXIT_INTR;
2717                 rc = -EINTR;
2718         }
2719
2720         if (guestdbg_exit_pending(vcpu) && !rc)  {
2721                 kvm_s390_prepare_debug_exit(vcpu);
2722                 rc = 0;
2723         }
2724
2725         if (rc == -EREMOTE) {
2726                 /* userspace support is needed, kvm_run has been prepared */
2727                 rc = 0;
2728         }
2729
2730         disable_cpu_timer_accounting(vcpu);
2731         store_regs(vcpu, kvm_run);
2732
2733         if (vcpu->sigset_active)
2734                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2735
2736         vcpu->stat.exit_userspace++;
2737         return rc;
2738 }
2739
2740 /*
2741  * store status at address
2742  * we use have two special cases:
2743  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2744  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2745  */
2746 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2747 {
2748         unsigned char archmode = 1;
2749         freg_t fprs[NUM_FPRS];
2750         unsigned int px;
2751         u64 clkcomp, cputm;
2752         int rc;
2753
2754         px = kvm_s390_get_prefix(vcpu);
2755         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2756                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2757                         return -EFAULT;
2758                 gpa = 0;
2759         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2760                 if (write_guest_real(vcpu, 163, &archmode, 1))
2761                         return -EFAULT;
2762                 gpa = px;
2763         } else
2764                 gpa -= __LC_FPREGS_SAVE_AREA;
2765
2766         /* manually convert vector registers if necessary */
2767         if (MACHINE_HAS_VX) {
2768                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2769                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2770                                      fprs, 128);
2771         } else {
2772                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2773                                      vcpu->run->s.regs.fprs, 128);
2774         }
2775         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2776                               vcpu->run->s.regs.gprs, 128);
2777         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2778                               &vcpu->arch.sie_block->gpsw, 16);
2779         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2780                               &px, 4);
2781         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2782                               &vcpu->run->s.regs.fpc, 4);
2783         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2784                               &vcpu->arch.sie_block->todpr, 4);
2785         cputm = kvm_s390_get_cpu_timer(vcpu);
2786         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2787                               &cputm, 8);
2788         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2789         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2790                               &clkcomp, 8);
2791         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2792                               &vcpu->run->s.regs.acrs, 64);
2793         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2794                               &vcpu->arch.sie_block->gcr, 128);
2795         return rc ? -EFAULT : 0;
2796 }
2797
2798 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2799 {
2800         /*
2801          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2802          * copying in vcpu load/put. Lets update our copies before we save
2803          * it into the save area
2804          */
2805         save_fpu_regs();
2806         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2807         save_access_regs(vcpu->run->s.regs.acrs);
2808
2809         return kvm_s390_store_status_unloaded(vcpu, addr);
2810 }
2811
2812 /*
2813  * store additional status at address
2814  */
2815 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2816                                         unsigned long gpa)
2817 {
2818         /* Only bits 0-53 are used for address formation */
2819         if (!(gpa & ~0x3ff))
2820                 return 0;
2821
2822         return write_guest_abs(vcpu, gpa & ~0x3ff,
2823                                (void *)&vcpu->run->s.regs.vrs, 512);
2824 }
2825
2826 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2827 {
2828         if (!test_kvm_facility(vcpu->kvm, 129))
2829                 return 0;
2830
2831         /*
2832          * The guest VXRS are in the host VXRs due to the lazy
2833          * copying in vcpu load/put. We can simply call save_fpu_regs()
2834          * to save the current register state because we are in the
2835          * middle of a load/put cycle.
2836          *
2837          * Let's update our copies before we save it into the save area.
2838          */
2839         save_fpu_regs();
2840
2841         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2842 }
2843
2844 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2845 {
2846         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2847         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2848 }
2849
2850 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2851 {
2852         unsigned int i;
2853         struct kvm_vcpu *vcpu;
2854
2855         kvm_for_each_vcpu(i, vcpu, kvm) {
2856                 __disable_ibs_on_vcpu(vcpu);
2857         }
2858 }
2859
2860 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2861 {
2862         if (!sclp.has_ibs)
2863                 return;
2864         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2865         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2866 }
2867
2868 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2869 {
2870         int i, online_vcpus, started_vcpus = 0;
2871
2872         if (!is_vcpu_stopped(vcpu))
2873                 return;
2874
2875         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2876         /* Only one cpu at a time may enter/leave the STOPPED state. */
2877         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2878         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2879
2880         for (i = 0; i < online_vcpus; i++) {
2881                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2882                         started_vcpus++;
2883         }
2884
2885         if (started_vcpus == 0) {
2886                 /* we're the only active VCPU -> speed it up */
2887                 __enable_ibs_on_vcpu(vcpu);
2888         } else if (started_vcpus == 1) {
2889                 /*
2890                  * As we are starting a second VCPU, we have to disable
2891                  * the IBS facility on all VCPUs to remove potentially
2892                  * oustanding ENABLE requests.
2893                  */
2894                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2895         }
2896
2897         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2898         /*
2899          * Another VCPU might have used IBS while we were offline.
2900          * Let's play safe and flush the VCPU at startup.
2901          */
2902         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2903         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2904         return;
2905 }
2906
2907 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2908 {
2909         int i, online_vcpus, started_vcpus = 0;
2910         struct kvm_vcpu *started_vcpu = NULL;
2911
2912         if (is_vcpu_stopped(vcpu))
2913                 return;
2914
2915         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2916         /* Only one cpu at a time may enter/leave the STOPPED state. */
2917         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2918         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2919
2920         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2921         kvm_s390_clear_stop_irq(vcpu);
2922
2923         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2924         __disable_ibs_on_vcpu(vcpu);
2925
2926         for (i = 0; i < online_vcpus; i++) {
2927                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2928                         started_vcpus++;
2929                         started_vcpu = vcpu->kvm->vcpus[i];
2930                 }
2931         }
2932
2933         if (started_vcpus == 1) {
2934                 /*
2935                  * As we only have one VCPU left, we want to enable the
2936                  * IBS facility for that VCPU to speed it up.
2937                  */
2938                 __enable_ibs_on_vcpu(started_vcpu);
2939         }
2940
2941         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2942         return;
2943 }
2944
2945 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2946                                      struct kvm_enable_cap *cap)
2947 {
2948         int r;
2949
2950         if (cap->flags)
2951                 return -EINVAL;
2952
2953         switch (cap->cap) {
2954         case KVM_CAP_S390_CSS_SUPPORT:
2955                 if (!vcpu->kvm->arch.css_support) {
2956                         vcpu->kvm->arch.css_support = 1;
2957                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2958                         trace_kvm_s390_enable_css(vcpu->kvm);
2959                 }
2960                 r = 0;
2961                 break;
2962         default:
2963                 r = -EINVAL;
2964                 break;
2965         }
2966         return r;
2967 }
2968
2969 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2970                                   struct kvm_s390_mem_op *mop)
2971 {
2972         void __user *uaddr = (void __user *)mop->buf;
2973         void *tmpbuf = NULL;
2974         int r, srcu_idx;
2975         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2976                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2977
2978         if (mop->flags & ~supported_flags)
2979                 return -EINVAL;
2980
2981         if (mop->size > MEM_OP_MAX_SIZE)
2982                 return -E2BIG;
2983
2984         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2985                 tmpbuf = vmalloc(mop->size);
2986                 if (!tmpbuf)
2987                         return -ENOMEM;
2988         }
2989
2990         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2991
2992         switch (mop->op) {
2993         case KVM_S390_MEMOP_LOGICAL_READ:
2994                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2995                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2996                                             mop->size, GACC_FETCH);
2997                         break;
2998                 }
2999                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3000                 if (r == 0) {
3001                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3002                                 r = -EFAULT;
3003                 }
3004                 break;
3005         case KVM_S390_MEMOP_LOGICAL_WRITE:
3006                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3007                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3008                                             mop->size, GACC_STORE);
3009                         break;
3010                 }
3011                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3012                         r = -EFAULT;
3013                         break;
3014                 }
3015                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3016                 break;
3017         default:
3018                 r = -EINVAL;
3019         }
3020
3021         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3022
3023         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3024                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3025
3026         vfree(tmpbuf);
3027         return r;
3028 }
3029
3030 long kvm_arch_vcpu_ioctl(struct file *filp,
3031                          unsigned int ioctl, unsigned long arg)
3032 {
3033         struct kvm_vcpu *vcpu = filp->private_data;
3034         void __user *argp = (void __user *)arg;
3035         int idx;
3036         long r;
3037
3038         switch (ioctl) {
3039         case KVM_S390_IRQ: {
3040                 struct kvm_s390_irq s390irq;
3041
3042                 r = -EFAULT;
3043                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3044                         break;
3045                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3046                 break;
3047         }
3048         case KVM_S390_INTERRUPT: {
3049                 struct kvm_s390_interrupt s390int;
3050                 struct kvm_s390_irq s390irq;
3051
3052                 r = -EFAULT;
3053                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3054                         break;
3055                 if (s390int_to_s390irq(&s390int, &s390irq))
3056                         return -EINVAL;
3057                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3058                 break;
3059         }
3060         case KVM_S390_STORE_STATUS:
3061                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3062                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3063                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3064                 break;
3065         case KVM_S390_SET_INITIAL_PSW: {
3066                 psw_t psw;
3067
3068                 r = -EFAULT;
3069                 if (copy_from_user(&psw, argp, sizeof(psw)))
3070                         break;
3071                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3072                 break;
3073         }
3074         case KVM_S390_INITIAL_RESET:
3075                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3076                 break;
3077         case KVM_SET_ONE_REG:
3078         case KVM_GET_ONE_REG: {
3079                 struct kvm_one_reg reg;
3080                 r = -EFAULT;
3081                 if (copy_from_user(&reg, argp, sizeof(reg)))
3082                         break;
3083                 if (ioctl == KVM_SET_ONE_REG)
3084                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3085                 else
3086                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3087                 break;
3088         }
3089 #ifdef CONFIG_KVM_S390_UCONTROL
3090         case KVM_S390_UCAS_MAP: {
3091                 struct kvm_s390_ucas_mapping ucasmap;
3092
3093                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3094                         r = -EFAULT;
3095                         break;
3096                 }
3097
3098                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3099                         r = -EINVAL;
3100                         break;
3101                 }
3102
3103                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3104                                      ucasmap.vcpu_addr, ucasmap.length);
3105                 break;
3106         }
3107         case KVM_S390_UCAS_UNMAP: {
3108                 struct kvm_s390_ucas_mapping ucasmap;
3109
3110                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3111                         r = -EFAULT;
3112                         break;
3113                 }
3114
3115                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3116                         r = -EINVAL;
3117                         break;
3118                 }
3119
3120                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3121                         ucasmap.length);
3122                 break;
3123         }
3124 #endif
3125         case KVM_S390_VCPU_FAULT: {
3126                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3127                 break;
3128         }
3129         case KVM_ENABLE_CAP:
3130         {
3131                 struct kvm_enable_cap cap;
3132                 r = -EFAULT;
3133                 if (copy_from_user(&cap, argp, sizeof(cap)))
3134                         break;
3135                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3136                 break;
3137         }
3138         case KVM_S390_MEM_OP: {
3139                 struct kvm_s390_mem_op mem_op;
3140
3141                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3142                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3143                 else
3144                         r = -EFAULT;
3145                 break;
3146         }
3147         case KVM_S390_SET_IRQ_STATE: {
3148                 struct kvm_s390_irq_state irq_state;
3149
3150                 r = -EFAULT;
3151                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3152                         break;
3153                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3154                     irq_state.len == 0 ||
3155                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3156                         r = -EINVAL;
3157                         break;
3158                 }
3159                 r = kvm_s390_set_irq_state(vcpu,
3160                                            (void __user *) irq_state.buf,
3161                                            irq_state.len);
3162                 break;
3163         }
3164         case KVM_S390_GET_IRQ_STATE: {
3165                 struct kvm_s390_irq_state irq_state;
3166
3167                 r = -EFAULT;
3168                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3169                         break;
3170                 if (irq_state.len == 0) {
3171                         r = -EINVAL;
3172                         break;
3173                 }
3174                 r = kvm_s390_get_irq_state(vcpu,
3175                                            (__u8 __user *)  irq_state.buf,
3176                                            irq_state.len);
3177                 break;
3178         }
3179         default:
3180                 r = -ENOTTY;
3181         }
3182         return r;
3183 }
3184
3185 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3186 {
3187 #ifdef CONFIG_KVM_S390_UCONTROL
3188         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3189                  && (kvm_is_ucontrol(vcpu->kvm))) {
3190                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3191                 get_page(vmf->page);
3192                 return 0;
3193         }
3194 #endif
3195         return VM_FAULT_SIGBUS;
3196 }
3197
3198 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3199                             unsigned long npages)
3200 {
3201         return 0;
3202 }
3203
3204 /* Section: memory related */
3205 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3206                                    struct kvm_memory_slot *memslot,
3207                                    const struct kvm_userspace_memory_region *mem,
3208                                    enum kvm_mr_change change)
3209 {
3210         /* A few sanity checks. We can have memory slots which have to be
3211            located/ended at a segment boundary (1MB). The memory in userland is
3212            ok to be fragmented into various different vmas. It is okay to mmap()
3213            and munmap() stuff in this slot after doing this call at any time */
3214
3215         if (mem->userspace_addr & 0xffffful)
3216                 return -EINVAL;
3217
3218         if (mem->memory_size & 0xffffful)
3219                 return -EINVAL;
3220
3221         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3222                 return -EINVAL;
3223
3224         return 0;
3225 }
3226
3227 void kvm_arch_commit_memory_region(struct kvm *kvm,
3228                                 const struct kvm_userspace_memory_region *mem,
3229                                 const struct kvm_memory_slot *old,
3230                                 const struct kvm_memory_slot *new,
3231                                 enum kvm_mr_change change)
3232 {
3233         int rc;
3234
3235         /* If the basics of the memslot do not change, we do not want
3236          * to update the gmap. Every update causes several unnecessary
3237          * segment translation exceptions. This is usually handled just
3238          * fine by the normal fault handler + gmap, but it will also
3239          * cause faults on the prefix page of running guest CPUs.
3240          */
3241         if (old->userspace_addr == mem->userspace_addr &&
3242             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3243             old->npages * PAGE_SIZE == mem->memory_size)
3244                 return;
3245
3246         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3247                 mem->guest_phys_addr, mem->memory_size);
3248         if (rc)
3249                 pr_warn("failed to commit memory region\n");
3250         return;
3251 }
3252
3253 static inline unsigned long nonhyp_mask(int i)
3254 {
3255         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3256
3257         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3258 }
3259
3260 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3261 {
3262         vcpu->valid_wakeup = false;
3263 }
3264
3265 static int __init kvm_s390_init(void)
3266 {
3267         int i;
3268
3269         if (!sclp.has_sief2) {
3270                 pr_info("SIE not available\n");
3271                 return -ENODEV;
3272         }
3273
3274         for (i = 0; i < 16; i++)
3275                 kvm_s390_fac_list_mask[i] |=
3276                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3277
3278         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3279 }
3280
3281 static void __exit kvm_s390_exit(void)
3282 {
3283         kvm_exit();
3284 }
3285
3286 module_init(kvm_s390_init);
3287 module_exit(kvm_s390_exit);
3288
3289 /*
3290  * Enable autoloading of the kvm module.
3291  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3292  * since x86 takes a different approach.
3293  */
3294 #include <linux/miscdevice.h>
3295 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3296 MODULE_ALIAS("devname:kvm");