arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include "kvm-s390.h"
  48 #include "gaccess.h"
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  85         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  86         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  87         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  88         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  89         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  90         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  91         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  92         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  93         { "deliver_program", VCPU_STAT(deliver_program) },
  94         { "deliver_io", VCPU_STAT(deliver_io) },
  95         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  96         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  97         { "inject_ckc", VCPU_STAT(inject_ckc) },
  98         { "inject_cputm", VCPU_STAT(inject_cputm) },
  99         { "inject_external_call", VCPU_STAT(inject_external_call) },
 100         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 101         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 102         { "inject_io", VM_STAT(inject_io) },
 103         { "inject_mchk", VCPU_STAT(inject_mchk) },
 104         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 105         { "inject_program", VCPU_STAT(inject_program) },
 106         { "inject_restart", VCPU_STAT(inject_restart) },
 107         { "inject_service_signal", VM_STAT(inject_service_signal) },
 108         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 109         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 110         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 111         { "inject_virtio", VM_STAT(inject_virtio) },
 112         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 113         { "instruction_gs", VCPU_STAT(instruction_gs) },
 114         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 115         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 116         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 117         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 118         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 119         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 120         { "instruction_sck", VCPU_STAT(instruction_sck) },
 121         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 122         { "instruction_spx", VCPU_STAT(instruction_spx) },
 123         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 124         { "instruction_stap", VCPU_STAT(instruction_stap) },
 125         { "instruction_iske", VCPU_STAT(instruction_iske) },
 126         { "instruction_ri", VCPU_STAT(instruction_ri) },
 127         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 128         { "instruction_sske", VCPU_STAT(instruction_sske) },
 129         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 130         { "instruction_essa", VCPU_STAT(instruction_essa) },
 131         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 132         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 133         { "instruction_tb", VCPU_STAT(instruction_tb) },
 134         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 135         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 136         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 137         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 138         { "instruction_sie", VCPU_STAT(instruction_sie) },
 139         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 140         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 141         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 142         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 143         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 144         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 145         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 146         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 147         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 148         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 149         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 150         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 151         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 152         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 153         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 154         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 155         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 156         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 157         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 158         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 159         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 160         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 161         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 162         { NULL }
 163 };
 164
 165 struct kvm_s390_tod_clock_ext {
 166         __u8 epoch_idx;
 167         __u64 tod;
 168         __u8 reserved[7];
 169 } __packed;
 170
 171 /* allow nested virtualization in KVM (if enabled by user space) */
 172 static int nested;
 173 module_param(nested, int, S_IRUGO);
 174 MODULE_PARM_DESC(nested, "Nested virtualization support");
 175
 176 /* allow 1m huge page guest backing, if !nested */
 177 static int hpage;
 178 module_param(hpage, int, 0444);
 179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 180
 181 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 182 static u8 halt_poll_max_steal = 10;
 183 module_param(halt_poll_max_steal, byte, 0644);
 184 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 185
 186 /*
 187  * For now we handle at most 16 double words as this is what the s390 base
 188  * kernel handles and stores in the prefix page. If we ever need to go beyond
 189  * this, this requires changes to code, but the external uapi can stay.
 190  */
 191 #define SIZE_INTERNAL 16
 192
 193 /*
 194  * Base feature mask that defines default mask for facilities. Consists of the
 195  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 196  */
 197 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 198 /*
 199  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 200  * and defines the facilities that can be enabled via a cpu model.
 201  */
 202 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 203
 204 static unsigned long kvm_s390_fac_size(void)
 205 {
 206         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 207         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 208         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 209                 sizeof(S390_lowcore.stfle_fac_list));
 210
 211         return SIZE_INTERNAL;
 212 }
 213
 214 /* available cpu features supported by kvm */
 215 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 216 /* available subfunctions indicated via query / "test bit" */
 217 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 218
 219 static struct gmap_notifier gmap_notifier;
 220 static struct gmap_notifier vsie_gmap_notifier;
 221 debug_info_t *kvm_s390_dbf;
 222
 223 /* Section: not file related */
 224 int kvm_arch_hardware_enable(void)
 225 {
 226         /* every s390 is virtualization enabled ;-) */
 227         return 0;
 228 }
 229
 230 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 231                               unsigned long end);
 232
 233 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 234 {
 235         u8 delta_idx = 0;
 236
 237         /*
 238          * The TOD jumps by delta, we have to compensate this by adding
 239          * -delta to the epoch.
 240          */
 241         delta = -delta;
 242
 243         /* sign-extension - we're adding to signed values below */
 244         if ((s64)delta < 0)
 245                 delta_idx = -1;
 246
 247         scb->epoch += delta;
 248         if (scb->ecd & ECD_MEF) {
 249                 scb->epdx += delta_idx;
 250                 if (scb->epoch < delta)
 251                         scb->epdx += 1;
 252         }
 253 }
 254
 255 /*
 256  * This callback is executed during stop_machine(). All CPUs are therefore
 257  * temporarily stopped. In order not to change guest behavior, we have to
 258  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 259  * so a CPU won't be stopped while calculating with the epoch.
 260  */
 261 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 262                           void *v)
 263 {
 264         struct kvm *kvm;
 265         struct kvm_vcpu *vcpu;
 266         int i;
 267         unsigned long long *delta = v;
 268
 269         list_for_each_entry(kvm, &vm_list, vm_list) {
 270                 kvm_for_each_vcpu(i, vcpu, kvm) {
 271                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 272                         if (i == 0) {
 273                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 274                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 275                         }
 276                         if (vcpu->arch.cputm_enabled)
 277                                 vcpu->arch.cputm_start += *delta;
 278                         if (vcpu->arch.vsie_block)
 279                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 280                                                    *delta);
 281                 }
 282         }
 283         return NOTIFY_OK;
 284 }
 285
 286 static struct notifier_block kvm_clock_notifier = {
 287         .notifier_call = kvm_clock_sync,
 288 };
 289
 290 int kvm_arch_hardware_setup(void)
 291 {
 292         gmap_notifier.notifier_call = kvm_gmap_notifier;
 293         gmap_register_pte_notifier(&gmap_notifier);
 294         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 295         gmap_register_pte_notifier(&vsie_gmap_notifier);
 296         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 297                                        &kvm_clock_notifier);
 298         return 0;
 299 }
 300
 301 void kvm_arch_hardware_unsetup(void)
 302 {
 303         gmap_unregister_pte_notifier(&gmap_notifier);
 304         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 305         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 306                                          &kvm_clock_notifier);
 307 }
 308
 309 static void allow_cpu_feat(unsigned long nr)
 310 {
 311         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 312 }
 313
 314 static inline int plo_test_bit(unsigned char nr)
 315 {
 316         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 317         int cc;
 318
 319         asm volatile(
 320                 /* Parameter registers are ignored for "test bit" */
 321                 "       plo     0,0,0,0(0)\n"
 322                 "       ipm     %0\n"
 323                 "       srl     %0,28\n"
 324                 : "=d" (cc)
 325                 : "d" (r0)
 326                 : "cc");
 327         return cc == 0;
 328 }
 329
 330 static inline void __insn32_query(unsigned int opcode, u8 query[32])
 331 {
 332         register unsigned long r0 asm("0") = 0; /* query function */
 333         register unsigned long r1 asm("1") = (unsigned long) query;
 334
 335         asm volatile(
 336                 /* Parameter regs are ignored */
 337                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 338                 : "=m" (*query)
 339                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
 340                 : "cc");
 341 }
 342
 343 #define INSN_SORTL 0xb938
 344 #define INSN_DFLTCC 0xb939
 345
 346 static void kvm_s390_cpu_feat_init(void)
 347 {
 348         int i;
 349
 350         for (i = 0; i < 256; ++i) {
 351                 if (plo_test_bit(i))
 352                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 353         }
 354
 355         if (test_facility(28)) /* TOD-clock steering */
 356                 ptff(kvm_s390_available_subfunc.ptff,
 357                      sizeof(kvm_s390_available_subfunc.ptff),
 358                      PTFF_QAF);
 359
 360         if (test_facility(17)) { /* MSA */
 361                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 362                               kvm_s390_available_subfunc.kmac);
 363                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 364                               kvm_s390_available_subfunc.kmc);
 365                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 366                               kvm_s390_available_subfunc.km);
 367                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 368                               kvm_s390_available_subfunc.kimd);
 369                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 370                               kvm_s390_available_subfunc.klmd);
 371         }
 372         if (test_facility(76)) /* MSA3 */
 373                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 374                               kvm_s390_available_subfunc.pckmo);
 375         if (test_facility(77)) { /* MSA4 */
 376                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 377                               kvm_s390_available_subfunc.kmctr);
 378                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 379                               kvm_s390_available_subfunc.kmf);
 380                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 381                               kvm_s390_available_subfunc.kmo);
 382                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 383                               kvm_s390_available_subfunc.pcc);
 384         }
 385         if (test_facility(57)) /* MSA5 */
 386                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 387                               kvm_s390_available_subfunc.ppno);
 388
 389         if (test_facility(146)) /* MSA8 */
 390                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 391                               kvm_s390_available_subfunc.kma);
 392
 393         if (test_facility(155)) /* MSA9 */
 394                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 395                               kvm_s390_available_subfunc.kdsa);
 396
 397         if (test_facility(150)) /* SORTL */
 398                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 399
 400         if (test_facility(151)) /* DFLTCC */
 401                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 402
 403         if (MACHINE_HAS_ESOP)
 404                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 405         /*
 406          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 407          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 408          */
 409         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 410             !test_facility(3) || !nested)
 411                 return;
 412         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 413         if (sclp.has_64bscao)
 414                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 415         if (sclp.has_siif)
 416                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 417         if (sclp.has_gpere)
 418                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 419         if (sclp.has_gsls)
 420                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 421         if (sclp.has_ib)
 422                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 423         if (sclp.has_cei)
 424                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 425         if (sclp.has_ibs)
 426                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 427         if (sclp.has_kss)
 428                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 429         /*
 430          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 431          * all skey handling functions read/set the skey from the PGSTE
 432          * instead of the real storage key.
 433          *
 434          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 435          * pages being detected as preserved although they are resident.
 436          *
 437          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 438          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 439          *
 440          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 441          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 442          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 443          *
 444          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 445          * cannot easily shadow the SCA because of the ipte lock.
 446          */
 447 }
 448
 449 int kvm_arch_init(void *opaque)
 450 {
 451         int rc;
 452
 453         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 454         if (!kvm_s390_dbf)
 455                 return -ENOMEM;
 456
 457         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 458                 rc = -ENOMEM;
 459                 goto out_debug_unreg;
 460         }
 461
 462         kvm_s390_cpu_feat_init();
 463
 464         /* Register floating interrupt controller interface. */
 465         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 466         if (rc) {
 467                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 468                 goto out_debug_unreg;
 469         }
 470
 471         rc = kvm_s390_gib_init(GAL_ISC);
 472         if (rc)
 473                 goto out_gib_destroy;
 474
 475         return 0;
 476
 477 out_gib_destroy:
 478         kvm_s390_gib_destroy();
 479 out_debug_unreg:
 480         debug_unregister(kvm_s390_dbf);
 481         return rc;
 482 }
 483
 484 void kvm_arch_exit(void)
 485 {
 486         kvm_s390_gib_destroy();
 487         debug_unregister(kvm_s390_dbf);
 488 }
 489
 490 /* Section: device related */
 491 long kvm_arch_dev_ioctl(struct file *filp,
 492                         unsigned int ioctl, unsigned long arg)
 493 {
 494         if (ioctl == KVM_S390_ENABLE_SIE)
 495                 return s390_enable_sie();
 496         return -EINVAL;
 497 }
 498
 499 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 500 {
 501         int r;
 502
 503         switch (ext) {
 504         case KVM_CAP_S390_PSW:
 505         case KVM_CAP_S390_GMAP:
 506         case KVM_CAP_SYNC_MMU:
 507 #ifdef CONFIG_KVM_S390_UCONTROL
 508         case KVM_CAP_S390_UCONTROL:
 509 #endif
 510         case KVM_CAP_ASYNC_PF:
 511         case KVM_CAP_SYNC_REGS:
 512         case KVM_CAP_ONE_REG:
 513         case KVM_CAP_ENABLE_CAP:
 514         case KVM_CAP_S390_CSS_SUPPORT:
 515         case KVM_CAP_IOEVENTFD:
 516         case KVM_CAP_DEVICE_CTRL:
 517         case KVM_CAP_S390_IRQCHIP:
 518         case KVM_CAP_VM_ATTRIBUTES:
 519         case KVM_CAP_MP_STATE:
 520         case KVM_CAP_IMMEDIATE_EXIT:
 521         case KVM_CAP_S390_INJECT_IRQ:
 522         case KVM_CAP_S390_USER_SIGP:
 523         case KVM_CAP_S390_USER_STSI:
 524         case KVM_CAP_S390_SKEYS:
 525         case KVM_CAP_S390_IRQ_STATE:
 526         case KVM_CAP_S390_USER_INSTR0:
 527         case KVM_CAP_S390_CMMA_MIGRATION:
 528         case KVM_CAP_S390_AIS:
 529         case KVM_CAP_S390_AIS_MIGRATION:
 530                 r = 1;
 531                 break;
 532         case KVM_CAP_S390_HPAGE_1M:
 533                 r = 0;
 534                 if (hpage && !kvm_is_ucontrol(kvm))
 535                         r = 1;
 536                 break;
 537         case KVM_CAP_S390_MEM_OP:
 538                 r = MEM_OP_MAX_SIZE;
 539                 break;
 540         case KVM_CAP_NR_VCPUS:
 541         case KVM_CAP_MAX_VCPUS:
 542         case KVM_CAP_MAX_VCPU_ID:
 543                 r = KVM_S390_BSCA_CPU_SLOTS;
 544                 if (!kvm_s390_use_sca_entries())
 545                         r = KVM_MAX_VCPUS;
 546                 else if (sclp.has_esca && sclp.has_64bscao)
 547                         r = KVM_S390_ESCA_CPU_SLOTS;
 548                 break;
 549         case KVM_CAP_S390_COW:
 550                 r = MACHINE_HAS_ESOP;
 551                 break;
 552         case KVM_CAP_S390_VECTOR_REGISTERS:
 553                 r = MACHINE_HAS_VX;
 554                 break;
 555         case KVM_CAP_S390_RI:
 556                 r = test_facility(64);
 557                 break;
 558         case KVM_CAP_S390_GS:
 559                 r = test_facility(133);
 560                 break;
 561         case KVM_CAP_S390_BPB:
 562                 r = test_facility(82);
 563                 break;
 564         default:
 565                 r = 0;
 566         }
 567         return r;
 568 }
 569
 570 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 571                                     struct kvm_memory_slot *memslot)
 572 {
 573         int i;
 574         gfn_t cur_gfn, last_gfn;
 575         unsigned long gaddr, vmaddr;
 576         struct gmap *gmap = kvm->arch.gmap;
 577         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 578
 579         /* Loop over all guest segments */
 580         cur_gfn = memslot->base_gfn;
 581         last_gfn = memslot->base_gfn + memslot->npages;
 582         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 583                 gaddr = gfn_to_gpa(cur_gfn);
 584                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 585                 if (kvm_is_error_hva(vmaddr))
 586                         continue;
 587
 588                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 589                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 590                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 591                         if (test_bit(i, bitmap))
 592                                 mark_page_dirty(kvm, cur_gfn + i);
 593                 }
 594
 595                 if (fatal_signal_pending(current))
 596                         return;
 597                 cond_resched();
 598         }
 599 }
 600
 601 /* Section: vm related */
 602 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 603
 604 /*
 605  * Get (and clear) the dirty memory log for a memory slot.
 606  */
 607 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 608                                struct kvm_dirty_log *log)
 609 {
 610         int r;
 611         unsigned long n;
 612         struct kvm_memslots *slots;
 613         struct kvm_memory_slot *memslot;
 614         int is_dirty = 0;
 615
 616         if (kvm_is_ucontrol(kvm))
 617                 return -EINVAL;
 618
 619         mutex_lock(&kvm->slots_lock);
 620
 621         r = -EINVAL;
 622         if (log->slot >= KVM_USER_MEM_SLOTS)
 623                 goto out;
 624
 625         slots = kvm_memslots(kvm);
 626         memslot = id_to_memslot(slots, log->slot);
 627         r = -ENOENT;
 628         if (!memslot->dirty_bitmap)
 629                 goto out;
 630
 631         kvm_s390_sync_dirty_log(kvm, memslot);
 632         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 633         if (r)
 634                 goto out;
 635
 636         /* Clear the dirty log */
 637         if (is_dirty) {
 638                 n = kvm_dirty_bitmap_bytes(memslot);
 639                 memset(memslot->dirty_bitmap, 0, n);
 640         }
 641         r = 0;
 642 out:
 643         mutex_unlock(&kvm->slots_lock);
 644         return r;
 645 }
 646
 647 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 648 {
 649         unsigned int i;
 650         struct kvm_vcpu *vcpu;
 651
 652         kvm_for_each_vcpu(i, vcpu, kvm) {
 653                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 654         }
 655 }
 656
 657 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 658 {
 659         int r;
 660
 661         if (cap->flags)
 662                 return -EINVAL;
 663
 664         switch (cap->cap) {
 665         case KVM_CAP_S390_IRQCHIP:
 666                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 667                 kvm->arch.use_irqchip = 1;
 668                 r = 0;
 669                 break;
 670         case KVM_CAP_S390_USER_SIGP:
 671                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 672                 kvm->arch.user_sigp = 1;
 673                 r = 0;
 674                 break;
 675         case KVM_CAP_S390_VECTOR_REGISTERS:
 676                 mutex_lock(&kvm->lock);
 677                 if (kvm->created_vcpus) {
 678                         r = -EBUSY;
 679                 } else if (MACHINE_HAS_VX) {
 680                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 681                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 682                         if (test_facility(134)) {
 683                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 684                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 685                         }
 686                         if (test_facility(135)) {
 687                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 688                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 689                         }
 690                         if (test_facility(148)) {
 691                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 692                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 693                         }
 694                         if (test_facility(152)) {
 695                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 696                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 697                         }
 698                         r = 0;
 699                 } else
 700                         r = -EINVAL;
 701                 mutex_unlock(&kvm->lock);
 702                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 703                          r ? "(not available)" : "(success)");
 704                 break;
 705         case KVM_CAP_S390_RI:
 706                 r = -EINVAL;
 707                 mutex_lock(&kvm->lock);
 708                 if (kvm->created_vcpus) {
 709                         r = -EBUSY;
 710                 } else if (test_facility(64)) {
 711                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 712                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 713                         r = 0;
 714                 }
 715                 mutex_unlock(&kvm->lock);
 716                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 717                          r ? "(not available)" : "(success)");
 718                 break;
 719         case KVM_CAP_S390_AIS:
 720                 mutex_lock(&kvm->lock);
 721                 if (kvm->created_vcpus) {
 722                         r = -EBUSY;
 723                 } else {
 724                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 725                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 726                         r = 0;
 727                 }
 728                 mutex_unlock(&kvm->lock);
 729                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 730                          r ? "(not available)" : "(success)");
 731                 break;
 732         case KVM_CAP_S390_GS:
 733                 r = -EINVAL;
 734                 mutex_lock(&kvm->lock);
 735                 if (kvm->created_vcpus) {
 736                         r = -EBUSY;
 737                 } else if (test_facility(133)) {
 738                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 739                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 740                         r = 0;
 741                 }
 742                 mutex_unlock(&kvm->lock);
 743                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 744                          r ? "(not available)" : "(success)");
 745                 break;
 746         case KVM_CAP_S390_HPAGE_1M:
 747                 mutex_lock(&kvm->lock);
 748                 if (kvm->created_vcpus)
 749                         r = -EBUSY;
 750                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 751                         r = -EINVAL;
 752                 else {
 753                         r = 0;
 754                         down_write(&kvm->mm->mmap_sem);
 755                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 756                         up_write(&kvm->mm->mmap_sem);
 757                         /*
 758                          * We might have to create fake 4k page
 759                          * tables. To avoid that the hardware works on
 760                          * stale PGSTEs, we emulate these instructions.
 761                          */
 762                         kvm->arch.use_skf = 0;
 763                         kvm->arch.use_pfmfi = 0;
 764                 }
 765                 mutex_unlock(&kvm->lock);
 766                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 767                          r ? "(not available)" : "(success)");
 768                 break;
 769         case KVM_CAP_S390_USER_STSI:
 770                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 771                 kvm->arch.user_stsi = 1;
 772                 r = 0;
 773                 break;
 774         case KVM_CAP_S390_USER_INSTR0:
 775                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 776                 kvm->arch.user_instr0 = 1;
 777                 icpt_operexc_on_all_vcpus(kvm);
 778                 r = 0;
 779                 break;
 780         default:
 781                 r = -EINVAL;
 782                 break;
 783         }
 784         return r;
 785 }
 786
 787 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 788 {
 789         int ret;
 790
 791         switch (attr->attr) {
 792         case KVM_S390_VM_MEM_LIMIT_SIZE:
 793                 ret = 0;
 794                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 795                          kvm->arch.mem_limit);
 796                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 797                         ret = -EFAULT;
 798                 break;
 799         default:
 800                 ret = -ENXIO;
 801                 break;
 802         }
 803         return ret;
 804 }
 805
 806 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 807 {
 808         int ret;
 809         unsigned int idx;
 810         switch (attr->attr) {
 811         case KVM_S390_VM_MEM_ENABLE_CMMA:
 812                 ret = -ENXIO;
 813                 if (!sclp.has_cmma)
 814                         break;
 815
 816                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 817                 mutex_lock(&kvm->lock);
 818                 if (kvm->created_vcpus)
 819                         ret = -EBUSY;
 820                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 821                         ret = -EINVAL;
 822                 else {
 823                         kvm->arch.use_cmma = 1;
 824                         /* Not compatible with cmma. */
 825                         kvm->arch.use_pfmfi = 0;
 826                         ret = 0;
 827                 }
 828                 mutex_unlock(&kvm->lock);
 829                 break;
 830         case KVM_S390_VM_MEM_CLR_CMMA:
 831                 ret = -ENXIO;
 832                 if (!sclp.has_cmma)
 833                         break;
 834                 ret = -EINVAL;
 835                 if (!kvm->arch.use_cmma)
 836                         break;
 837
 838                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 839                 mutex_lock(&kvm->lock);
 840                 idx = srcu_read_lock(&kvm->srcu);
 841                 s390_reset_cmma(kvm->arch.gmap->mm);
 842                 srcu_read_unlock(&kvm->srcu, idx);
 843                 mutex_unlock(&kvm->lock);
 844                 ret = 0;
 845                 break;
 846         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 847                 unsigned long new_limit;
 848
 849                 if (kvm_is_ucontrol(kvm))
 850                         return -EINVAL;
 851
 852                 if (get_user(new_limit, (u64 __user *)attr->addr))
 853                         return -EFAULT;
 854
 855                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 856                     new_limit > kvm->arch.mem_limit)
 857                         return -E2BIG;
 858
 859                 if (!new_limit)
 860                         return -EINVAL;
 861
 862                 /* gmap_create takes last usable address */
 863                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 864                         new_limit -= 1;
 865
 866                 ret = -EBUSY;
 867                 mutex_lock(&kvm->lock);
 868                 if (!kvm->created_vcpus) {
 869                         /* gmap_create will round the limit up */
 870                         struct gmap *new = gmap_create(current->mm, new_limit);
 871
 872                         if (!new) {
 873                                 ret = -ENOMEM;
 874                         } else {
 875                                 gmap_remove(kvm->arch.gmap);
 876                                 new->private = kvm;
 877                                 kvm->arch.gmap = new;
 878                                 ret = 0;
 879                         }
 880                 }
 881                 mutex_unlock(&kvm->lock);
 882                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 883                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 884                          (void *) kvm->arch.gmap->asce);
 885                 break;
 886         }
 887         default:
 888                 ret = -ENXIO;
 889                 break;
 890         }
 891         return ret;
 892 }
 893
 894 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 895
 896 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 897 {
 898         struct kvm_vcpu *vcpu;
 899         int i;
 900
 901         kvm_s390_vcpu_block_all(kvm);
 902
 903         kvm_for_each_vcpu(i, vcpu, kvm) {
 904                 kvm_s390_vcpu_crypto_setup(vcpu);
 905                 /* recreate the shadow crycb by leaving the VSIE handler */
 906                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 907         }
 908
 909         kvm_s390_vcpu_unblock_all(kvm);
 910 }
 911
 912 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 913 {
 914         mutex_lock(&kvm->lock);
 915         switch (attr->attr) {
 916         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 917                 if (!test_kvm_facility(kvm, 76)) {
 918                         mutex_unlock(&kvm->lock);
 919                         return -EINVAL;
 920                 }
 921                 get_random_bytes(
 922                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 923                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 924                 kvm->arch.crypto.aes_kw = 1;
 925                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 926                 break;
 927         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 928                 if (!test_kvm_facility(kvm, 76)) {
 929                         mutex_unlock(&kvm->lock);
 930                         return -EINVAL;
 931                 }
 932                 get_random_bytes(
 933                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 934                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 935                 kvm->arch.crypto.dea_kw = 1;
 936                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 937                 break;
 938         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 939                 if (!test_kvm_facility(kvm, 76)) {
 940                         mutex_unlock(&kvm->lock);
 941                         return -EINVAL;
 942                 }
 943                 kvm->arch.crypto.aes_kw = 0;
 944                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 945                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 946                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 947                 break;
 948         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 949                 if (!test_kvm_facility(kvm, 76)) {
 950                         mutex_unlock(&kvm->lock);
 951                         return -EINVAL;
 952                 }
 953                 kvm->arch.crypto.dea_kw = 0;
 954                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 955                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 956                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 957                 break;
 958         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 959                 if (!ap_instructions_available()) {
 960                         mutex_unlock(&kvm->lock);
 961                         return -EOPNOTSUPP;
 962                 }
 963                 kvm->arch.crypto.apie = 1;
 964                 break;
 965         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 966                 if (!ap_instructions_available()) {
 967                         mutex_unlock(&kvm->lock);
 968                         return -EOPNOTSUPP;
 969                 }
 970                 kvm->arch.crypto.apie = 0;
 971                 break;
 972         default:
 973                 mutex_unlock(&kvm->lock);
 974                 return -ENXIO;
 975         }
 976
 977         kvm_s390_vcpu_crypto_reset_all(kvm);
 978         mutex_unlock(&kvm->lock);
 979         return 0;
 980 }
 981
 982 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 983 {
 984         int cx;
 985         struct kvm_vcpu *vcpu;
 986
 987         kvm_for_each_vcpu(cx, vcpu, kvm)
 988                 kvm_s390_sync_request(req, vcpu);
 989 }
 990
 991 /*
 992  * Must be called with kvm->srcu held to avoid races on memslots, and with
 993  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 994  */
 995 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 996 {
 997         struct kvm_memory_slot *ms;
 998         struct kvm_memslots *slots;
 999         unsigned long ram_pages = 0;
1000         int slotnr;
1001
1002         /* migration mode already enabled */
1003         if (kvm->arch.migration_mode)
1004                 return 0;
1005         slots = kvm_memslots(kvm);
1006         if (!slots || !slots->used_slots)
1007                 return -EINVAL;
1008
1009         if (!kvm->arch.use_cmma) {
1010                 kvm->arch.migration_mode = 1;
1011                 return 0;
1012         }
1013         /* mark all the pages in active slots as dirty */
1014         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1015                 ms = slots->memslots + slotnr;
1016                 /*
1017                  * The second half of the bitmap is only used on x86,
1018                  * and would be wasted otherwise, so we put it to good
1019                  * use here to keep track of the state of the storage
1020                  * attributes.
1021                  */
1022                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1023                 ram_pages += ms->npages;
1024         }
1025         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1026         kvm->arch.migration_mode = 1;
1027         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1028         return 0;
1029 }
1030
1031 /*
1032  * Must be called with kvm->slots_lock to avoid races with ourselves and
1033  * kvm_s390_vm_start_migration.
1034  */
1035 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1036 {
1037         /* migration mode already disabled */
1038         if (!kvm->arch.migration_mode)
1039                 return 0;
1040         kvm->arch.migration_mode = 0;
1041         if (kvm->arch.use_cmma)
1042                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1043         return 0;
1044 }
1045
1046 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1047                                      struct kvm_device_attr *attr)
1048 {
1049         int res = -ENXIO;
1050
1051         mutex_lock(&kvm->slots_lock);
1052         switch (attr->attr) {
1053         case KVM_S390_VM_MIGRATION_START:
1054                 res = kvm_s390_vm_start_migration(kvm);
1055                 break;
1056         case KVM_S390_VM_MIGRATION_STOP:
1057                 res = kvm_s390_vm_stop_migration(kvm);
1058                 break;
1059         default:
1060                 break;
1061         }
1062         mutex_unlock(&kvm->slots_lock);
1063
1064         return res;
1065 }
1066
1067 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1068                                      struct kvm_device_attr *attr)
1069 {
1070         u64 mig = kvm->arch.migration_mode;
1071
1072         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1073                 return -ENXIO;
1074
1075         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1076                 return -EFAULT;
1077         return 0;
1078 }
1079
1080 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1081 {
1082         struct kvm_s390_vm_tod_clock gtod;
1083
1084         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1085                 return -EFAULT;
1086
1087         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1088                 return -EINVAL;
1089         kvm_s390_set_tod_clock(kvm, &gtod);
1090
1091         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1092                 gtod.epoch_idx, gtod.tod);
1093
1094         return 0;
1095 }
1096
1097 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1098 {
1099         u8 gtod_high;
1100
1101         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1102                                            sizeof(gtod_high)))
1103                 return -EFAULT;
1104
1105         if (gtod_high != 0)
1106                 return -EINVAL;
1107         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1108
1109         return 0;
1110 }
1111
1112 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1113 {
1114         struct kvm_s390_vm_tod_clock gtod = { 0 };
1115
1116         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1117                            sizeof(gtod.tod)))
1118                 return -EFAULT;
1119
1120         kvm_s390_set_tod_clock(kvm, &gtod);
1121         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1122         return 0;
1123 }
1124
1125 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1126 {
1127         int ret;
1128
1129         if (attr->flags)
1130                 return -EINVAL;
1131
1132         switch (attr->attr) {
1133         case KVM_S390_VM_TOD_EXT:
1134                 ret = kvm_s390_set_tod_ext(kvm, attr);
1135                 break;
1136         case KVM_S390_VM_TOD_HIGH:
1137                 ret = kvm_s390_set_tod_high(kvm, attr);
1138                 break;
1139         case KVM_S390_VM_TOD_LOW:
1140                 ret = kvm_s390_set_tod_low(kvm, attr);
1141                 break;
1142         default:
1143                 ret = -ENXIO;
1144                 break;
1145         }
1146         return ret;
1147 }
1148
1149 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1150                                    struct kvm_s390_vm_tod_clock *gtod)
1151 {
1152         struct kvm_s390_tod_clock_ext htod;
1153
1154         preempt_disable();
1155
1156         get_tod_clock_ext((char *)&htod);
1157
1158         gtod->tod = htod.tod + kvm->arch.epoch;
1159         gtod->epoch_idx = 0;
1160         if (test_kvm_facility(kvm, 139)) {
1161                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1162                 if (gtod->tod < htod.tod)
1163                         gtod->epoch_idx += 1;
1164         }
1165
1166         preempt_enable();
1167 }
1168
1169 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1170 {
1171         struct kvm_s390_vm_tod_clock gtod;
1172
1173         memset(&gtod, 0, sizeof(gtod));
1174         kvm_s390_get_tod_clock(kvm, &gtod);
1175         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1176                 return -EFAULT;
1177
1178         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1179                 gtod.epoch_idx, gtod.tod);
1180         return 0;
1181 }
1182
1183 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1184 {
1185         u8 gtod_high = 0;
1186
1187         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1188                                          sizeof(gtod_high)))
1189                 return -EFAULT;
1190         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1191
1192         return 0;
1193 }
1194
1195 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1196 {
1197         u64 gtod;
1198
1199         gtod = kvm_s390_get_tod_clock_fast(kvm);
1200         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1201                 return -EFAULT;
1202         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1203
1204         return 0;
1205 }
1206
1207 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1208 {
1209         int ret;
1210
1211         if (attr->flags)
1212                 return -EINVAL;
1213
1214         switch (attr->attr) {
1215         case KVM_S390_VM_TOD_EXT:
1216                 ret = kvm_s390_get_tod_ext(kvm, attr);
1217                 break;
1218         case KVM_S390_VM_TOD_HIGH:
1219                 ret = kvm_s390_get_tod_high(kvm, attr);
1220                 break;
1221         case KVM_S390_VM_TOD_LOW:
1222                 ret = kvm_s390_get_tod_low(kvm, attr);
1223                 break;
1224         default:
1225                 ret = -ENXIO;
1226                 break;
1227         }
1228         return ret;
1229 }
1230
1231 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1232 {
1233         struct kvm_s390_vm_cpu_processor *proc;
1234         u16 lowest_ibc, unblocked_ibc;
1235         int ret = 0;
1236
1237         mutex_lock(&kvm->lock);
1238         if (kvm->created_vcpus) {
1239                 ret = -EBUSY;
1240                 goto out;
1241         }
1242         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1243         if (!proc) {
1244                 ret = -ENOMEM;
1245                 goto out;
1246         }
1247         if (!copy_from_user(proc, (void __user *)attr->addr,
1248                             sizeof(*proc))) {
1249                 kvm->arch.model.cpuid = proc->cpuid;
1250                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1251                 unblocked_ibc = sclp.ibc & 0xfff;
1252                 if (lowest_ibc && proc->ibc) {
1253                         if (proc->ibc > unblocked_ibc)
1254                                 kvm->arch.model.ibc = unblocked_ibc;
1255                         else if (proc->ibc < lowest_ibc)
1256                                 kvm->arch.model.ibc = lowest_ibc;
1257                         else
1258                                 kvm->arch.model.ibc = proc->ibc;
1259                 }
1260                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1261                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1262                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1263                          kvm->arch.model.ibc,
1264                          kvm->arch.model.cpuid);
1265                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1266                          kvm->arch.model.fac_list[0],
1267                          kvm->arch.model.fac_list[1],
1268                          kvm->arch.model.fac_list[2]);
1269         } else
1270                 ret = -EFAULT;
1271         kfree(proc);
1272 out:
1273         mutex_unlock(&kvm->lock);
1274         return ret;
1275 }
1276
1277 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1278                                        struct kvm_device_attr *attr)
1279 {
1280         struct kvm_s390_vm_cpu_feat data;
1281
1282         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1283                 return -EFAULT;
1284         if (!bitmap_subset((unsigned long *) data.feat,
1285                            kvm_s390_available_cpu_feat,
1286                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1287                 return -EINVAL;
1288
1289         mutex_lock(&kvm->lock);
1290         if (kvm->created_vcpus) {
1291                 mutex_unlock(&kvm->lock);
1292                 return -EBUSY;
1293         }
1294         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1295                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1296         mutex_unlock(&kvm->lock);
1297         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1298                          data.feat[0],
1299                          data.feat[1],
1300                          data.feat[2]);
1301         return 0;
1302 }
1303
1304 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1305                                           struct kvm_device_attr *attr)
1306 {
1307         mutex_lock(&kvm->lock);
1308         if (kvm->created_vcpus) {
1309                 mutex_unlock(&kvm->lock);
1310                 return -EBUSY;
1311         }
1312
1313         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1314                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1315                 mutex_unlock(&kvm->lock);
1316                 return -EFAULT;
1317         }
1318         mutex_unlock(&kvm->lock);
1319
1320         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1321                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1322                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1323                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1324                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1325         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1326                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1327                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1328         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1329                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1330                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1331         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1332                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1333                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1334         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1335                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1336                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1337         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1338                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1339                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1340         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1341                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1342                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1343         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1344                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1345                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1346         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1347                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1348                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1349         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1350                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1351                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1352         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1353                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1354                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1355         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1356                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1357                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1358         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1359                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1360                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1361         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1364         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1367         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1372         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1377
1378         return 0;
1379 }
1380
1381 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1382 {
1383         int ret = -ENXIO;
1384
1385         switch (attr->attr) {
1386         case KVM_S390_VM_CPU_PROCESSOR:
1387                 ret = kvm_s390_set_processor(kvm, attr);
1388                 break;
1389         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1390                 ret = kvm_s390_set_processor_feat(kvm, attr);
1391                 break;
1392         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1393                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1394                 break;
1395         }
1396         return ret;
1397 }
1398
1399 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1400 {
1401         struct kvm_s390_vm_cpu_processor *proc;
1402         int ret = 0;
1403
1404         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1405         if (!proc) {
1406                 ret = -ENOMEM;
1407                 goto out;
1408         }
1409         proc->cpuid = kvm->arch.model.cpuid;
1410         proc->ibc = kvm->arch.model.ibc;
1411         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1412                S390_ARCH_FAC_LIST_SIZE_BYTE);
1413         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1414                  kvm->arch.model.ibc,
1415                  kvm->arch.model.cpuid);
1416         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1417                  kvm->arch.model.fac_list[0],
1418                  kvm->arch.model.fac_list[1],
1419                  kvm->arch.model.fac_list[2]);
1420         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1421                 ret = -EFAULT;
1422         kfree(proc);
1423 out:
1424         return ret;
1425 }
1426
1427 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1428 {
1429         struct kvm_s390_vm_cpu_machine *mach;
1430         int ret = 0;
1431
1432         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1433         if (!mach) {
1434                 ret = -ENOMEM;
1435                 goto out;
1436         }
1437         get_cpu_id((struct cpuid *) &mach->cpuid);
1438         mach->ibc = sclp.ibc;
1439         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1440                S390_ARCH_FAC_LIST_SIZE_BYTE);
1441         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1442                sizeof(S390_lowcore.stfle_fac_list));
1443         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1444                  kvm->arch.model.ibc,
1445                  kvm->arch.model.cpuid);
1446         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1447                  mach->fac_mask[0],
1448                  mach->fac_mask[1],
1449                  mach->fac_mask[2]);
1450         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1451                  mach->fac_list[0],
1452                  mach->fac_list[1],
1453                  mach->fac_list[2]);
1454         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1455                 ret = -EFAULT;
1456         kfree(mach);
1457 out:
1458         return ret;
1459 }
1460
1461 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1462                                        struct kvm_device_attr *attr)
1463 {
1464         struct kvm_s390_vm_cpu_feat data;
1465
1466         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1467                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1468         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1469                 return -EFAULT;
1470         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1471                          data.feat[0],
1472                          data.feat[1],
1473                          data.feat[2]);
1474         return 0;
1475 }
1476
1477 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1478                                      struct kvm_device_attr *attr)
1479 {
1480         struct kvm_s390_vm_cpu_feat data;
1481
1482         bitmap_copy((unsigned long *) data.feat,
1483                     kvm_s390_available_cpu_feat,
1484                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1485         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1486                 return -EFAULT;
1487         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1488                          data.feat[0],
1489                          data.feat[1],
1490                          data.feat[2]);
1491         return 0;
1492 }
1493
1494 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1495                                           struct kvm_device_attr *attr)
1496 {
1497         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1498             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1499                 return -EFAULT;
1500
1501         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1502                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1503                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1504                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1505                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1506         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1507                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1508                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1509         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1510                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1511                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1512         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1513                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1514                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1515         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1516                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1517                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1518         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1519                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1520                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1521         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1522                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1523                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1524         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1525                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1526                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1527         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1528                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1529                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1530         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1531                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1532                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1533         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1534                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1535                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1536         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1537                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1538                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1539         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1540                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1541                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1542         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1545         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1548         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1553         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1558
1559         return 0;
1560 }
1561
1562 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1563                                         struct kvm_device_attr *attr)
1564 {
1565         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1566             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1567                 return -EFAULT;
1568
1569         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1570                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1571                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1572                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1573                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1574         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1575                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1576                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1577         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1578                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1579                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1580         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1581                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1582                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1583         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1584                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1585                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1586         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1587                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1588                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1589         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1590                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1591                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1592         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1593                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1594                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1595         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1596                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1597                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1598         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1599                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1600                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1601         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1602                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1603                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1604         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1605                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1606                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1607         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1608                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1609                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1610         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1611                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1612                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1613         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1614                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1615                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1616         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1617                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1618                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1619                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1620                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1621         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1622                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1625                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1626
1627         return 0;
1628 }
1629
1630 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1631 {
1632         int ret = -ENXIO;
1633
1634         switch (attr->attr) {
1635         case KVM_S390_VM_CPU_PROCESSOR:
1636                 ret = kvm_s390_get_processor(kvm, attr);
1637                 break;
1638         case KVM_S390_VM_CPU_MACHINE:
1639                 ret = kvm_s390_get_machine(kvm, attr);
1640                 break;
1641         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1642                 ret = kvm_s390_get_processor_feat(kvm, attr);
1643                 break;
1644         case KVM_S390_VM_CPU_MACHINE_FEAT:
1645                 ret = kvm_s390_get_machine_feat(kvm, attr);
1646                 break;
1647         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1648                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1649                 break;
1650         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1651                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1652                 break;
1653         }
1654         return ret;
1655 }
1656
1657 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1658 {
1659         int ret;
1660
1661         switch (attr->group) {
1662         case KVM_S390_VM_MEM_CTRL:
1663                 ret = kvm_s390_set_mem_control(kvm, attr);
1664                 break;
1665         case KVM_S390_VM_TOD:
1666                 ret = kvm_s390_set_tod(kvm, attr);
1667                 break;
1668         case KVM_S390_VM_CPU_MODEL:
1669                 ret = kvm_s390_set_cpu_model(kvm, attr);
1670                 break;
1671         case KVM_S390_VM_CRYPTO:
1672                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1673                 break;
1674         case KVM_S390_VM_MIGRATION:
1675                 ret = kvm_s390_vm_set_migration(kvm, attr);
1676                 break;
1677         default:
1678                 ret = -ENXIO;
1679                 break;
1680         }
1681
1682         return ret;
1683 }
1684
1685 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1686 {
1687         int ret;
1688
1689         switch (attr->group) {
1690         case KVM_S390_VM_MEM_CTRL:
1691                 ret = kvm_s390_get_mem_control(kvm, attr);
1692                 break;
1693         case KVM_S390_VM_TOD:
1694                 ret = kvm_s390_get_tod(kvm, attr);
1695                 break;
1696         case KVM_S390_VM_CPU_MODEL:
1697                 ret = kvm_s390_get_cpu_model(kvm, attr);
1698                 break;
1699         case KVM_S390_VM_MIGRATION:
1700                 ret = kvm_s390_vm_get_migration(kvm, attr);
1701                 break;
1702         default:
1703                 ret = -ENXIO;
1704                 break;
1705         }
1706
1707         return ret;
1708 }
1709
1710 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1711 {
1712         int ret;
1713
1714         switch (attr->group) {
1715         case KVM_S390_VM_MEM_CTRL:
1716                 switch (attr->attr) {
1717                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1718                 case KVM_S390_VM_MEM_CLR_CMMA:
1719                         ret = sclp.has_cmma ? 0 : -ENXIO;
1720                         break;
1721                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1722                         ret = 0;
1723                         break;
1724                 default:
1725                         ret = -ENXIO;
1726                         break;
1727                 }
1728                 break;
1729         case KVM_S390_VM_TOD:
1730                 switch (attr->attr) {
1731                 case KVM_S390_VM_TOD_LOW:
1732                 case KVM_S390_VM_TOD_HIGH:
1733                         ret = 0;
1734                         break;
1735                 default:
1736                         ret = -ENXIO;
1737                         break;
1738                 }
1739                 break;
1740         case KVM_S390_VM_CPU_MODEL:
1741                 switch (attr->attr) {
1742                 case KVM_S390_VM_CPU_PROCESSOR:
1743                 case KVM_S390_VM_CPU_MACHINE:
1744                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1745                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1746                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1747                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1748                         ret = 0;
1749                         break;
1750                 default:
1751                         ret = -ENXIO;
1752                         break;
1753                 }
1754                 break;
1755         case KVM_S390_VM_CRYPTO:
1756                 switch (attr->attr) {
1757                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1758                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1759                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1760                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1761                         ret = 0;
1762                         break;
1763                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1764                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1765                         ret = ap_instructions_available() ? 0 : -ENXIO;
1766                         break;
1767                 default:
1768                         ret = -ENXIO;
1769                         break;
1770                 }
1771                 break;
1772         case KVM_S390_VM_MIGRATION:
1773                 ret = 0;
1774                 break;
1775         default:
1776                 ret = -ENXIO;
1777                 break;
1778         }
1779
1780         return ret;
1781 }
1782
1783 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1784 {
1785         uint8_t *keys;
1786         uint64_t hva;
1787         int srcu_idx, i, r = 0;
1788
1789         if (args->flags != 0)
1790                 return -EINVAL;
1791
1792         /* Is this guest using storage keys? */
1793         if (!mm_uses_skeys(current->mm))
1794                 return KVM_S390_GET_SKEYS_NONE;
1795
1796         /* Enforce sane limit on memory allocation */
1797         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1798                 return -EINVAL;
1799
1800         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1801         if (!keys)
1802                 return -ENOMEM;
1803
1804         down_read(&current->mm->mmap_sem);
1805         srcu_idx = srcu_read_lock(&kvm->srcu);
1806         for (i = 0; i < args->count; i++) {
1807                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1808                 if (kvm_is_error_hva(hva)) {
1809                         r = -EFAULT;
1810                         break;
1811                 }
1812
1813                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1814                 if (r)
1815                         break;
1816         }
1817         srcu_read_unlock(&kvm->srcu, srcu_idx);
1818         up_read(&current->mm->mmap_sem);
1819
1820         if (!r) {
1821                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1822                                  sizeof(uint8_t) * args->count);
1823                 if (r)
1824                         r = -EFAULT;
1825         }
1826
1827         kvfree(keys);
1828         return r;
1829 }
1830
1831 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1832 {
1833         uint8_t *keys;
1834         uint64_t hva;
1835         int srcu_idx, i, r = 0;
1836         bool unlocked;
1837
1838         if (args->flags != 0)
1839                 return -EINVAL;
1840
1841         /* Enforce sane limit on memory allocation */
1842         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1843                 return -EINVAL;
1844
1845         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1846         if (!keys)
1847                 return -ENOMEM;
1848
1849         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1850                            sizeof(uint8_t) * args->count);
1851         if (r) {
1852                 r = -EFAULT;
1853                 goto out;
1854         }
1855
1856         /* Enable storage key handling for the guest */
1857         r = s390_enable_skey();
1858         if (r)
1859                 goto out;
1860
1861         i = 0;
1862         down_read(&current->mm->mmap_sem);
1863         srcu_idx = srcu_read_lock(&kvm->srcu);
1864         while (i < args->count) {
1865                 unlocked = false;
1866                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1867                 if (kvm_is_error_hva(hva)) {
1868                         r = -EFAULT;
1869                         break;
1870                 }
1871
1872                 /* Lowest order bit is reserved */
1873                 if (keys[i] & 0x01) {
1874                         r = -EINVAL;
1875                         break;
1876                 }
1877
1878                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1879                 if (r) {
1880                         r = fixup_user_fault(current, current->mm, hva,
1881                                              FAULT_FLAG_WRITE, &unlocked);
1882                         if (r)
1883                                 break;
1884                 }
1885                 if (!r)
1886                         i++;
1887         }
1888         srcu_read_unlock(&kvm->srcu, srcu_idx);
1889         up_read(&current->mm->mmap_sem);
1890 out:
1891         kvfree(keys);
1892         return r;
1893 }
1894
1895 /*
1896  * Base address and length must be sent at the start of each block, therefore
1897  * it's cheaper to send some clean data, as long as it's less than the size of
1898  * two longs.
1899  */
1900 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1901 /* for consistency */
1902 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1903
1904 /*
1905  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1906  * address falls in a hole. In that case the index of one of the memslots
1907  * bordering the hole is returned.
1908  */
1909 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1910 {
1911         int start = 0, end = slots->used_slots;
1912         int slot = atomic_read(&slots->lru_slot);
1913         struct kvm_memory_slot *memslots = slots->memslots;
1914
1915         if (gfn >= memslots[slot].base_gfn &&
1916             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1917                 return slot;
1918
1919         while (start < end) {
1920                 slot = start + (end - start) / 2;
1921
1922                 if (gfn >= memslots[slot].base_gfn)
1923                         end = slot;
1924                 else
1925                         start = slot + 1;
1926         }
1927
1928         if (gfn >= memslots[start].base_gfn &&
1929             gfn < memslots[start].base_gfn + memslots[start].npages) {
1930                 atomic_set(&slots->lru_slot, start);
1931         }
1932
1933         return start;
1934 }
1935
1936 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1937                               u8 *res, unsigned long bufsize)
1938 {
1939         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1940
1941         args->count = 0;
1942         while (args->count < bufsize) {
1943                 hva = gfn_to_hva(kvm, cur_gfn);
1944                 /*
1945                  * We return an error if the first value was invalid, but we
1946                  * return successfully if at least one value was copied.
1947                  */
1948                 if (kvm_is_error_hva(hva))
1949                         return args->count ? 0 : -EFAULT;
1950                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1951                         pgstev = 0;
1952                 res[args->count++] = (pgstev >> 24) & 0x43;
1953                 cur_gfn++;
1954         }
1955
1956         return 0;
1957 }
1958
1959 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1960                                               unsigned long cur_gfn)
1961 {
1962         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1963         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1964         unsigned long ofs = cur_gfn - ms->base_gfn;
1965
1966         if (ms->base_gfn + ms->npages <= cur_gfn) {
1967                 slotidx--;
1968                 /* If we are above the highest slot, wrap around */
1969                 if (slotidx < 0)
1970                         slotidx = slots->used_slots - 1;
1971
1972                 ms = slots->memslots + slotidx;
1973                 ofs = 0;
1974         }
1975         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1976         while ((slotidx > 0) && (ofs >= ms->npages)) {
1977                 slotidx--;
1978                 ms = slots->memslots + slotidx;
1979                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1980         }
1981         return ms->base_gfn + ofs;
1982 }
1983
1984 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1985                              u8 *res, unsigned long bufsize)
1986 {
1987         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1988         struct kvm_memslots *slots = kvm_memslots(kvm);
1989         struct kvm_memory_slot *ms;
1990
1991         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1992         ms = gfn_to_memslot(kvm, cur_gfn);
1993         args->count = 0;
1994         args->start_gfn = cur_gfn;
1995         if (!ms)
1996                 return 0;
1997         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1998         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1999
2000         while (args->count < bufsize) {
2001                 hva = gfn_to_hva(kvm, cur_gfn);
2002                 if (kvm_is_error_hva(hva))
2003                         return 0;
2004                 /* Decrement only if we actually flipped the bit to 0 */
2005                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2006                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2007                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2008                         pgstev = 0;
2009                 /* Save the value */
2010                 res[args->count++] = (pgstev >> 24) & 0x43;
2011                 /* If the next bit is too far away, stop. */
2012                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2013                         return 0;
2014                 /* If we reached the previous "next", find the next one */
2015                 if (cur_gfn == next_gfn)
2016                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2017                 /* Reached the end of memory or of the buffer, stop */
2018                 if ((next_gfn >= mem_end) ||
2019                     (next_gfn - args->start_gfn >= bufsize))
2020                         return 0;
2021                 cur_gfn++;
2022                 /* Reached the end of the current memslot, take the next one. */
2023                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2024                         ms = gfn_to_memslot(kvm, cur_gfn);
2025                         if (!ms)
2026                                 return 0;
2027                 }
2028         }
2029         return 0;
2030 }
2031
2032 /*
2033  * This function searches for the next page with dirty CMMA attributes, and
2034  * saves the attributes in the buffer up to either the end of the buffer or
2035  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2036  * no trailing clean bytes are saved.
2037  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2038  * output buffer will indicate 0 as length.
2039  */
2040 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2041                                   struct kvm_s390_cmma_log *args)
2042 {
2043         unsigned long bufsize;
2044         int srcu_idx, peek, ret;
2045         u8 *values;
2046
2047         if (!kvm->arch.use_cmma)
2048                 return -ENXIO;
2049         /* Invalid/unsupported flags were specified */
2050         if (args->flags & ~KVM_S390_CMMA_PEEK)
2051                 return -EINVAL;
2052         /* Migration mode query, and we are not doing a migration */
2053         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2054         if (!peek && !kvm->arch.migration_mode)
2055                 return -EINVAL;
2056         /* CMMA is disabled or was not used, or the buffer has length zero */
2057         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2058         if (!bufsize || !kvm->mm->context.uses_cmm) {
2059                 memset(args, 0, sizeof(*args));
2060                 return 0;
2061         }
2062         /* We are not peeking, and there are no dirty pages */
2063         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2064                 memset(args, 0, sizeof(*args));
2065                 return 0;
2066         }
2067
2068         values = vmalloc(bufsize);
2069         if (!values)
2070                 return -ENOMEM;
2071
2072         down_read(&kvm->mm->mmap_sem);
2073         srcu_idx = srcu_read_lock(&kvm->srcu);
2074         if (peek)
2075                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2076         else
2077                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2078         srcu_read_unlock(&kvm->srcu, srcu_idx);
2079         up_read(&kvm->mm->mmap_sem);
2080
2081         if (kvm->arch.migration_mode)
2082                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2083         else
2084                 args->remaining = 0;
2085
2086         if (copy_to_user((void __user *)args->values, values, args->count))
2087                 ret = -EFAULT;
2088
2089         vfree(values);
2090         return ret;
2091 }
2092
2093 /*
2094  * This function sets the CMMA attributes for the given pages. If the input
2095  * buffer has zero length, no action is taken, otherwise the attributes are
2096  * set and the mm->context.uses_cmm flag is set.
2097  */
2098 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2099                                   const struct kvm_s390_cmma_log *args)
2100 {
2101         unsigned long hva, mask, pgstev, i;
2102         uint8_t *bits;
2103         int srcu_idx, r = 0;
2104
2105         mask = args->mask;
2106
2107         if (!kvm->arch.use_cmma)
2108                 return -ENXIO;
2109         /* invalid/unsupported flags */
2110         if (args->flags != 0)
2111                 return -EINVAL;
2112         /* Enforce sane limit on memory allocation */
2113         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2114                 return -EINVAL;
2115         /* Nothing to do */
2116         if (args->count == 0)
2117                 return 0;
2118
2119         bits = vmalloc(array_size(sizeof(*bits), args->count));
2120         if (!bits)
2121                 return -ENOMEM;
2122
2123         r = copy_from_user(bits, (void __user *)args->values, args->count);
2124         if (r) {
2125                 r = -EFAULT;
2126                 goto out;
2127         }
2128
2129         down_read(&kvm->mm->mmap_sem);
2130         srcu_idx = srcu_read_lock(&kvm->srcu);
2131         for (i = 0; i < args->count; i++) {
2132                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2133                 if (kvm_is_error_hva(hva)) {
2134                         r = -EFAULT;
2135                         break;
2136                 }
2137
2138                 pgstev = bits[i];
2139                 pgstev = pgstev << 24;
2140                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2141                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2142         }
2143         srcu_read_unlock(&kvm->srcu, srcu_idx);
2144         up_read(&kvm->mm->mmap_sem);
2145
2146         if (!kvm->mm->context.uses_cmm) {
2147                 down_write(&kvm->mm->mmap_sem);
2148                 kvm->mm->context.uses_cmm = 1;
2149                 up_write(&kvm->mm->mmap_sem);
2150         }
2151 out:
2152         vfree(bits);
2153         return r;
2154 }
2155
2156 long kvm_arch_vm_ioctl(struct file *filp,
2157                        unsigned int ioctl, unsigned long arg)
2158 {
2159         struct kvm *kvm = filp->private_data;
2160         void __user *argp = (void __user *)arg;
2161         struct kvm_device_attr attr;
2162         int r;
2163
2164         switch (ioctl) {
2165         case KVM_S390_INTERRUPT: {
2166                 struct kvm_s390_interrupt s390int;
2167
2168                 r = -EFAULT;
2169                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2170                         break;
2171                 r = kvm_s390_inject_vm(kvm, &s390int);
2172                 break;
2173         }
2174         case KVM_CREATE_IRQCHIP: {
2175                 struct kvm_irq_routing_entry routing;
2176
2177                 r = -EINVAL;
2178                 if (kvm->arch.use_irqchip) {
2179                         /* Set up dummy routing. */
2180                         memset(&routing, 0, sizeof(routing));
2181                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2182                 }
2183                 break;
2184         }
2185         case KVM_SET_DEVICE_ATTR: {
2186                 r = -EFAULT;
2187                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2188                         break;
2189                 r = kvm_s390_vm_set_attr(kvm, &attr);
2190                 break;
2191         }
2192         case KVM_GET_DEVICE_ATTR: {
2193                 r = -EFAULT;
2194                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2195                         break;
2196                 r = kvm_s390_vm_get_attr(kvm, &attr);
2197                 break;
2198         }
2199         case KVM_HAS_DEVICE_ATTR: {
2200                 r = -EFAULT;
2201                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2202                         break;
2203                 r = kvm_s390_vm_has_attr(kvm, &attr);
2204                 break;
2205         }
2206         case KVM_S390_GET_SKEYS: {
2207                 struct kvm_s390_skeys args;
2208
2209                 r = -EFAULT;
2210                 if (copy_from_user(&args, argp,
2211                                    sizeof(struct kvm_s390_skeys)))
2212                         break;
2213                 r = kvm_s390_get_skeys(kvm, &args);
2214                 break;
2215         }
2216         case KVM_S390_SET_SKEYS: {
2217                 struct kvm_s390_skeys args;
2218
2219                 r = -EFAULT;
2220                 if (copy_from_user(&args, argp,
2221                                    sizeof(struct kvm_s390_skeys)))
2222                         break;
2223                 r = kvm_s390_set_skeys(kvm, &args);
2224                 break;
2225         }
2226         case KVM_S390_GET_CMMA_BITS: {
2227                 struct kvm_s390_cmma_log args;
2228
2229                 r = -EFAULT;
2230                 if (copy_from_user(&args, argp, sizeof(args)))
2231                         break;
2232                 mutex_lock(&kvm->slots_lock);
2233                 r = kvm_s390_get_cmma_bits(kvm, &args);
2234                 mutex_unlock(&kvm->slots_lock);
2235                 if (!r) {
2236                         r = copy_to_user(argp, &args, sizeof(args));
2237                         if (r)
2238                                 r = -EFAULT;
2239                 }
2240                 break;
2241         }
2242         case KVM_S390_SET_CMMA_BITS: {
2243                 struct kvm_s390_cmma_log args;
2244
2245                 r = -EFAULT;
2246                 if (copy_from_user(&args, argp, sizeof(args)))
2247                         break;
2248                 mutex_lock(&kvm->slots_lock);
2249                 r = kvm_s390_set_cmma_bits(kvm, &args);
2250                 mutex_unlock(&kvm->slots_lock);
2251                 break;
2252         }
2253         default:
2254                 r = -ENOTTY;
2255         }
2256
2257         return r;
2258 }
2259
2260 static int kvm_s390_apxa_installed(void)
2261 {
2262         struct ap_config_info info;
2263
2264         if (ap_instructions_available()) {
2265                 if (ap_qci(&info) == 0)
2266                         return info.apxa;
2267         }
2268
2269         return 0;
2270 }
2271
2272 /*
2273  * The format of the crypto control block (CRYCB) is specified in the 3 low
2274  * order bits of the CRYCB designation (CRYCBD) field as follows:
2275  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2276  *           AP extended addressing (APXA) facility are installed.
2277  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2278  * Format 2: Both the APXA and MSAX3 facilities are installed
2279  */
2280 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2281 {
2282         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2283
2284         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2285         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2286
2287         /* Check whether MSAX3 is installed */
2288         if (!test_kvm_facility(kvm, 76))
2289                 return;
2290
2291         if (kvm_s390_apxa_installed())
2292                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2293         else
2294                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2295 }
2296
2297 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2298                                unsigned long *aqm, unsigned long *adm)
2299 {
2300         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2301
2302         mutex_lock(&kvm->lock);
2303         kvm_s390_vcpu_block_all(kvm);
2304
2305         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2306         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2307                 memcpy(crycb->apcb1.apm, apm, 32);
2308                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2309                          apm[0], apm[1], apm[2], apm[3]);
2310                 memcpy(crycb->apcb1.aqm, aqm, 32);
2311                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2312                          aqm[0], aqm[1], aqm[2], aqm[3]);
2313                 memcpy(crycb->apcb1.adm, adm, 32);
2314                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2315                          adm[0], adm[1], adm[2], adm[3]);
2316                 break;
2317         case CRYCB_FORMAT1:
2318         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2319                 memcpy(crycb->apcb0.apm, apm, 8);
2320                 memcpy(crycb->apcb0.aqm, aqm, 2);
2321                 memcpy(crycb->apcb0.adm, adm, 2);
2322                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2323                          apm[0], *((unsigned short *)aqm),
2324                          *((unsigned short *)adm));
2325                 break;
2326         default:        /* Can not happen */
2327                 break;
2328         }
2329
2330         /* recreate the shadow crycb for each vcpu */
2331         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2332         kvm_s390_vcpu_unblock_all(kvm);
2333         mutex_unlock(&kvm->lock);
2334 }
2335 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2336
2337 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2338 {
2339         mutex_lock(&kvm->lock);
2340         kvm_s390_vcpu_block_all(kvm);
2341
2342         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2343                sizeof(kvm->arch.crypto.crycb->apcb0));
2344         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2345                sizeof(kvm->arch.crypto.crycb->apcb1));
2346
2347         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2348         /* recreate the shadow crycb for each vcpu */
2349         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2350         kvm_s390_vcpu_unblock_all(kvm);
2351         mutex_unlock(&kvm->lock);
2352 }
2353 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2354
2355 static u64 kvm_s390_get_initial_cpuid(void)
2356 {
2357         struct cpuid cpuid;
2358
2359         get_cpu_id(&cpuid);
2360         cpuid.version = 0xff;
2361         return *((u64 *) &cpuid);
2362 }
2363
2364 static void kvm_s390_crypto_init(struct kvm *kvm)
2365 {
2366         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2367         kvm_s390_set_crycb_format(kvm);
2368
2369         if (!test_kvm_facility(kvm, 76))
2370                 return;
2371
2372         /* Enable AES/DEA protected key functions by default */
2373         kvm->arch.crypto.aes_kw = 1;
2374         kvm->arch.crypto.dea_kw = 1;
2375         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2376                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2377         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2378                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2379 }
2380
2381 static void sca_dispose(struct kvm *kvm)
2382 {
2383         if (kvm->arch.use_esca)
2384                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2385         else
2386                 free_page((unsigned long)(kvm->arch.sca));
2387         kvm->arch.sca = NULL;
2388 }
2389
2390 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2391 {
2392         gfp_t alloc_flags = GFP_KERNEL;
2393         int i, rc;
2394         char debug_name[16];
2395         static unsigned long sca_offset;
2396
2397         rc = -EINVAL;
2398 #ifdef CONFIG_KVM_S390_UCONTROL
2399         if (type & ~KVM_VM_S390_UCONTROL)
2400                 goto out_err;
2401         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2402                 goto out_err;
2403 #else
2404         if (type)
2405                 goto out_err;
2406 #endif
2407
2408         rc = s390_enable_sie();
2409         if (rc)
2410                 goto out_err;
2411
2412         rc = -ENOMEM;
2413
2414         if (!sclp.has_64bscao)
2415                 alloc_flags |= GFP_DMA;
2416         rwlock_init(&kvm->arch.sca_lock);
2417         /* start with basic SCA */
2418         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2419         if (!kvm->arch.sca)
2420                 goto out_err;
2421         spin_lock(&kvm_lock);
2422         sca_offset += 16;
2423         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2424                 sca_offset = 0;
2425         kvm->arch.sca = (struct bsca_block *)
2426                         ((char *) kvm->arch.sca + sca_offset);
2427         spin_unlock(&kvm_lock);
2428
2429         sprintf(debug_name, "kvm-%u", current->pid);
2430
2431         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2432         if (!kvm->arch.dbf)
2433                 goto out_err;
2434
2435         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2436         kvm->arch.sie_page2 =
2437              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2438         if (!kvm->arch.sie_page2)
2439                 goto out_err;
2440
2441         kvm->arch.sie_page2->kvm = kvm;
2442         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2443
2444         for (i = 0; i < kvm_s390_fac_size(); i++) {
2445                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2446                                               (kvm_s390_fac_base[i] |
2447                                                kvm_s390_fac_ext[i]);
2448                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2449                                               kvm_s390_fac_base[i];
2450         }
2451         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2452
2453         /* we are always in czam mode - even on pre z14 machines */
2454         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2455         set_kvm_facility(kvm->arch.model.fac_list, 138);
2456         /* we emulate STHYI in kvm */
2457         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2458         set_kvm_facility(kvm->arch.model.fac_list, 74);
2459         if (MACHINE_HAS_TLB_GUEST) {
2460                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2461                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2462         }
2463
2464         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2465         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2466
2467         kvm_s390_crypto_init(kvm);
2468
2469         mutex_init(&kvm->arch.float_int.ais_lock);
2470         spin_lock_init(&kvm->arch.float_int.lock);
2471         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2472                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2473         init_waitqueue_head(&kvm->arch.ipte_wq);
2474         mutex_init(&kvm->arch.ipte_mutex);
2475
2476         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2477         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2478
2479         if (type & KVM_VM_S390_UCONTROL) {
2480                 kvm->arch.gmap = NULL;
2481                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2482         } else {
2483                 if (sclp.hamax == U64_MAX)
2484                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2485                 else
2486                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2487                                                     sclp.hamax + 1);
2488                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2489                 if (!kvm->arch.gmap)
2490                         goto out_err;
2491                 kvm->arch.gmap->private = kvm;
2492                 kvm->arch.gmap->pfault_enabled = 0;
2493         }
2494
2495         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2496         kvm->arch.use_skf = sclp.has_skey;
2497         spin_lock_init(&kvm->arch.start_stop_lock);
2498         kvm_s390_vsie_init(kvm);
2499         kvm_s390_gisa_init(kvm);
2500         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2501
2502         return 0;
2503 out_err:
2504         free_page((unsigned long)kvm->arch.sie_page2);
2505         debug_unregister(kvm->arch.dbf);
2506         sca_dispose(kvm);
2507         KVM_EVENT(3, "creation of vm failed: %d", rc);
2508         return rc;
2509 }
2510
2511 bool kvm_arch_has_vcpu_debugfs(void)
2512 {
2513         return false;
2514 }
2515
2516 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2517 {
2518         return 0;
2519 }
2520
2521 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2522 {
2523         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2524         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2525         kvm_s390_clear_local_irqs(vcpu);
2526         kvm_clear_async_pf_completion_queue(vcpu);
2527         if (!kvm_is_ucontrol(vcpu->kvm))
2528                 sca_del_vcpu(vcpu);
2529
2530         if (kvm_is_ucontrol(vcpu->kvm))
2531                 gmap_remove(vcpu->arch.gmap);
2532
2533         if (vcpu->kvm->arch.use_cmma)
2534                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2535         free_page((unsigned long)(vcpu->arch.sie_block));
2536
2537         kvm_vcpu_uninit(vcpu);
2538         kmem_cache_free(kvm_vcpu_cache, vcpu);
2539 }
2540
2541 static void kvm_free_vcpus(struct kvm *kvm)
2542 {
2543         unsigned int i;
2544         struct kvm_vcpu *vcpu;
2545
2546         kvm_for_each_vcpu(i, vcpu, kvm)
2547                 kvm_arch_vcpu_destroy(vcpu);
2548
2549         mutex_lock(&kvm->lock);
2550         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2551                 kvm->vcpus[i] = NULL;
2552
2553         atomic_set(&kvm->online_vcpus, 0);
2554         mutex_unlock(&kvm->lock);
2555 }
2556
2557 void kvm_arch_destroy_vm(struct kvm *kvm)
2558 {
2559         kvm_free_vcpus(kvm);
2560         sca_dispose(kvm);
2561         debug_unregister(kvm->arch.dbf);
2562         kvm_s390_gisa_destroy(kvm);
2563         free_page((unsigned long)kvm->arch.sie_page2);
2564         if (!kvm_is_ucontrol(kvm))
2565                 gmap_remove(kvm->arch.gmap);
2566         kvm_s390_destroy_adapters(kvm);
2567         kvm_s390_clear_float_irqs(kvm);
2568         kvm_s390_vsie_destroy(kvm);
2569         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2570 }
2571
2572 /* Section: vcpu related */
2573 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2574 {
2575         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2576         if (!vcpu->arch.gmap)
2577                 return -ENOMEM;
2578         vcpu->arch.gmap->private = vcpu->kvm;
2579
2580         return 0;
2581 }
2582
2583 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2584 {
2585         if (!kvm_s390_use_sca_entries())
2586                 return;
2587         read_lock(&vcpu->kvm->arch.sca_lock);
2588         if (vcpu->kvm->arch.use_esca) {
2589                 struct esca_block *sca = vcpu->kvm->arch.sca;
2590
2591                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2592                 sca->cpu[vcpu->vcpu_id].sda = 0;
2593         } else {
2594                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2595
2596                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2597                 sca->cpu[vcpu->vcpu_id].sda = 0;
2598         }
2599         read_unlock(&vcpu->kvm->arch.sca_lock);
2600 }
2601
2602 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2603 {
2604         if (!kvm_s390_use_sca_entries()) {
2605                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2606
2607                 /* we still need the basic sca for the ipte control */
2608                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2609                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2610                 return;
2611         }
2612         read_lock(&vcpu->kvm->arch.sca_lock);
2613         if (vcpu->kvm->arch.use_esca) {
2614                 struct esca_block *sca = vcpu->kvm->arch.sca;
2615
2616                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2617                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2618                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2619                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2620                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2621         } else {
2622                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2623
2624                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2625                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2626                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2627                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2628         }
2629         read_unlock(&vcpu->kvm->arch.sca_lock);
2630 }
2631
2632 /* Basic SCA to Extended SCA data copy routines */
2633 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2634 {
2635         d->sda = s->sda;
2636         d->sigp_ctrl.c = s->sigp_ctrl.c;
2637         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2638 }
2639
2640 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2641 {
2642         int i;
2643
2644         d->ipte_control = s->ipte_control;
2645         d->mcn[0] = s->mcn;
2646         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2647                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2648 }
2649
2650 static int sca_switch_to_extended(struct kvm *kvm)
2651 {
2652         struct bsca_block *old_sca = kvm->arch.sca;
2653         struct esca_block *new_sca;
2654         struct kvm_vcpu *vcpu;
2655         unsigned int vcpu_idx;
2656         u32 scaol, scaoh;
2657
2658         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2659         if (!new_sca)
2660                 return -ENOMEM;
2661
2662         scaoh = (u32)((u64)(new_sca) >> 32);
2663         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2664
2665         kvm_s390_vcpu_block_all(kvm);
2666         write_lock(&kvm->arch.sca_lock);
2667
2668         sca_copy_b_to_e(new_sca, old_sca);
2669
2670         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2671                 vcpu->arch.sie_block->scaoh = scaoh;
2672                 vcpu->arch.sie_block->scaol = scaol;
2673                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2674         }
2675         kvm->arch.sca = new_sca;
2676         kvm->arch.use_esca = 1;
2677
2678         write_unlock(&kvm->arch.sca_lock);
2679         kvm_s390_vcpu_unblock_all(kvm);
2680
2681         free_page((unsigned long)old_sca);
2682
2683         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2684                  old_sca, kvm->arch.sca);
2685         return 0;
2686 }
2687
2688 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2689 {
2690         int rc;
2691
2692         if (!kvm_s390_use_sca_entries()) {
2693                 if (id < KVM_MAX_VCPUS)
2694                         return true;
2695                 return false;
2696         }
2697         if (id < KVM_S390_BSCA_CPU_SLOTS)
2698                 return true;
2699         if (!sclp.has_esca || !sclp.has_64bscao)
2700                 return false;
2701
2702         mutex_lock(&kvm->lock);
2703         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2704         mutex_unlock(&kvm->lock);
2705
2706         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2707 }
2708
2709 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2710 {
2711         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2712         kvm_clear_async_pf_completion_queue(vcpu);
2713         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2714                                     KVM_SYNC_GPRS |
2715                                     KVM_SYNC_ACRS |
2716                                     KVM_SYNC_CRS |
2717                                     KVM_SYNC_ARCH0 |
2718                                     KVM_SYNC_PFAULT;
2719         kvm_s390_set_prefix(vcpu, 0);
2720         if (test_kvm_facility(vcpu->kvm, 64))
2721                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2722         if (test_kvm_facility(vcpu->kvm, 82))
2723                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2724         if (test_kvm_facility(vcpu->kvm, 133))
2725                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2726         if (test_kvm_facility(vcpu->kvm, 156))
2727                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2728         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2729          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2730          */
2731         if (MACHINE_HAS_VX)
2732                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2733         else
2734                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2735
2736         if (kvm_is_ucontrol(vcpu->kvm))
2737                 return __kvm_ucontrol_vcpu_init(vcpu);
2738
2739         return 0;
2740 }
2741
2742 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2743 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2744 {
2745         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2746         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2747         vcpu->arch.cputm_start = get_tod_clock_fast();
2748         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2749 }
2750
2751 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2752 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2753 {
2754         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2755         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2756         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2757         vcpu->arch.cputm_start = 0;
2758         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2759 }
2760
2761 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2762 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2763 {
2764         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2765         vcpu->arch.cputm_enabled = true;
2766         __start_cpu_timer_accounting(vcpu);
2767 }
2768
2769 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2770 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2771 {
2772         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2773         __stop_cpu_timer_accounting(vcpu);
2774         vcpu->arch.cputm_enabled = false;
2775 }
2776
2777 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2778 {
2779         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2780         __enable_cpu_timer_accounting(vcpu);
2781         preempt_enable();
2782 }
2783
2784 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2785 {
2786         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2787         __disable_cpu_timer_accounting(vcpu);
2788         preempt_enable();
2789 }
2790
2791 /* set the cpu timer - may only be called from the VCPU thread itself */
2792 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2793 {
2794         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2795         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2796         if (vcpu->arch.cputm_enabled)
2797                 vcpu->arch.cputm_start = get_tod_clock_fast();
2798         vcpu->arch.sie_block->cputm = cputm;
2799         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2800         preempt_enable();
2801 }
2802
2803 /* update and get the cpu timer - can also be called from other VCPU threads */
2804 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2805 {
2806         unsigned int seq;
2807         __u64 value;
2808
2809         if (unlikely(!vcpu->arch.cputm_enabled))
2810                 return vcpu->arch.sie_block->cputm;
2811
2812         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2813         do {
2814                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2815                 /*
2816                  * If the writer would ever execute a read in the critical
2817                  * section, e.g. in irq context, we have a deadlock.
2818                  */
2819                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2820                 value = vcpu->arch.sie_block->cputm;
2821                 /* if cputm_start is 0, accounting is being started/stopped */
2822                 if (likely(vcpu->arch.cputm_start))
2823                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2824         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2825         preempt_enable();
2826         return value;
2827 }
2828
2829 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2830 {
2831
2832         gmap_enable(vcpu->arch.enabled_gmap);
2833         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2834         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2835                 __start_cpu_timer_accounting(vcpu);
2836         vcpu->cpu = cpu;
2837 }
2838
2839 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2840 {
2841         vcpu->cpu = -1;
2842         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2843                 __stop_cpu_timer_accounting(vcpu);
2844         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2845         vcpu->arch.enabled_gmap = gmap_get_enabled();
2846         gmap_disable(vcpu->arch.enabled_gmap);
2847
2848 }
2849
2850 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2851 {
2852         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2853         vcpu->arch.sie_block->gpsw.mask = 0UL;
2854         vcpu->arch.sie_block->gpsw.addr = 0UL;
2855         kvm_s390_set_prefix(vcpu, 0);
2856         kvm_s390_set_cpu_timer(vcpu, 0);
2857         vcpu->arch.sie_block->ckc       = 0UL;
2858         vcpu->arch.sie_block->todpr     = 0;
2859         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2860         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2861                                         CR0_INTERRUPT_KEY_SUBMASK |
2862                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2863         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2864                                         CR14_UNUSED_33 |
2865                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2866         /* make sure the new fpc will be lazily loaded */
2867         save_fpu_regs();
2868         current->thread.fpu.fpc = 0;
2869         vcpu->arch.sie_block->gbea = 1;
2870         vcpu->arch.sie_block->pp = 0;
2871         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2872         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2873         kvm_clear_async_pf_completion_queue(vcpu);
2874         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2875                 kvm_s390_vcpu_stop(vcpu);
2876         kvm_s390_clear_local_irqs(vcpu);
2877 }
2878
2879 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2880 {
2881         mutex_lock(&vcpu->kvm->lock);
2882         preempt_disable();
2883         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2884         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2885         preempt_enable();
2886         mutex_unlock(&vcpu->kvm->lock);
2887         if (!kvm_is_ucontrol(vcpu->kvm)) {
2888                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2889                 sca_add_vcpu(vcpu);
2890         }
2891         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2892                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2893         /* make vcpu_load load the right gmap on the first trigger */
2894         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2895 }
2896
2897 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2898 {
2899         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2900             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2901                 return true;
2902         return false;
2903 }
2904
2905 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2906 {
2907         /* At least one ECC subfunction must be present */
2908         return kvm_has_pckmo_subfunc(kvm, 32) ||
2909                kvm_has_pckmo_subfunc(kvm, 33) ||
2910                kvm_has_pckmo_subfunc(kvm, 34) ||
2911                kvm_has_pckmo_subfunc(kvm, 40) ||
2912                kvm_has_pckmo_subfunc(kvm, 41);
2913
2914 }
2915
2916 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2917 {
2918         /*
2919          * If the AP instructions are not being interpreted and the MSAX3
2920          * facility is not configured for the guest, there is nothing to set up.
2921          */
2922         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2923                 return;
2924
2925         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2926         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2927         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2928         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2929
2930         if (vcpu->kvm->arch.crypto.apie)
2931                 vcpu->arch.sie_block->eca |= ECA_APIE;
2932
2933         /* Set up protected key support */
2934         if (vcpu->kvm->arch.crypto.aes_kw) {
2935                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2936                 /* ecc is also wrapped with AES key */
2937                 if (kvm_has_pckmo_ecc(vcpu->kvm))
2938                         vcpu->arch.sie_block->ecd |= ECD_ECC;
2939         }
2940
2941         if (vcpu->kvm->arch.crypto.dea_kw)
2942                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2943 }
2944
2945 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2946 {
2947         free_page(vcpu->arch.sie_block->cbrlo);
2948         vcpu->arch.sie_block->cbrlo = 0;
2949 }
2950
2951 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2952 {
2953         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2954         if (!vcpu->arch.sie_block->cbrlo)
2955                 return -ENOMEM;
2956         return 0;
2957 }
2958
2959 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2960 {
2961         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2962
2963         vcpu->arch.sie_block->ibc = model->ibc;
2964         if (test_kvm_facility(vcpu->kvm, 7))
2965                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2966 }
2967
2968 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2969 {
2970         int rc = 0;
2971
2972         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2973                                                     CPUSTAT_SM |
2974                                                     CPUSTAT_STOPPED);
2975
2976         if (test_kvm_facility(vcpu->kvm, 78))
2977                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2978         else if (test_kvm_facility(vcpu->kvm, 8))
2979                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2980
2981         kvm_s390_vcpu_setup_model(vcpu);
2982
2983         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2984         if (MACHINE_HAS_ESOP)
2985                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2986         if (test_kvm_facility(vcpu->kvm, 9))
2987                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2988         if (test_kvm_facility(vcpu->kvm, 73))
2989                 vcpu->arch.sie_block->ecb |= ECB_TE;
2990
2991         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2992                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2993         if (test_kvm_facility(vcpu->kvm, 130))
2994                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2995         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2996         if (sclp.has_cei)
2997                 vcpu->arch.sie_block->eca |= ECA_CEI;
2998         if (sclp.has_ib)
2999                 vcpu->arch.sie_block->eca |= ECA_IB;
3000         if (sclp.has_siif)
3001                 vcpu->arch.sie_block->eca |= ECA_SII;
3002         if (sclp.has_sigpif)
3003                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3004         if (test_kvm_facility(vcpu->kvm, 129)) {
3005                 vcpu->arch.sie_block->eca |= ECA_VX;
3006                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3007         }
3008         if (test_kvm_facility(vcpu->kvm, 139))
3009                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3010         if (test_kvm_facility(vcpu->kvm, 156))
3011                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3012         if (vcpu->arch.sie_block->gd) {
3013                 vcpu->arch.sie_block->eca |= ECA_AIV;
3014                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3015                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3016         }
3017         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3018                                         | SDNXC;
3019         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3020
3021         if (sclp.has_kss)
3022                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3023         else
3024                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3025
3026         if (vcpu->kvm->arch.use_cmma) {
3027                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3028                 if (rc)
3029                         return rc;
3030         }
3031         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3032         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3033
3034         vcpu->arch.sie_block->hpid = HPID_KVM;
3035
3036         kvm_s390_vcpu_crypto_setup(vcpu);
3037
3038         return rc;
3039 }
3040
3041 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3042                                       unsigned int id)
3043 {
3044         struct kvm_vcpu *vcpu;
3045         struct sie_page *sie_page;
3046         int rc = -EINVAL;
3047
3048         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3049                 goto out;
3050
3051         rc = -ENOMEM;
3052
3053         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3054         if (!vcpu)
3055                 goto out;
3056
3057         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3058         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3059         if (!sie_page)
3060                 goto out_free_cpu;
3061
3062         vcpu->arch.sie_block = &sie_page->sie_block;
3063         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3064
3065         /* the real guest size will always be smaller than msl */
3066         vcpu->arch.sie_block->mso = 0;
3067         vcpu->arch.sie_block->msl = sclp.hamax;
3068
3069         vcpu->arch.sie_block->icpua = id;
3070         spin_lock_init(&vcpu->arch.local_int.lock);
3071         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
3072         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3073                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3074         seqcount_init(&vcpu->arch.cputm_seqcount);
3075
3076         rc = kvm_vcpu_init(vcpu, kvm, id);
3077         if (rc)
3078                 goto out_free_sie_block;
3079         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
3080                  vcpu->arch.sie_block);
3081         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
3082
3083         return vcpu;
3084 out_free_sie_block:
3085         free_page((unsigned long)(vcpu->arch.sie_block));
3086 out_free_cpu:
3087         kmem_cache_free(kvm_vcpu_cache, vcpu);
3088 out:
3089         return ERR_PTR(rc);
3090 }
3091
3092 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3093 {
3094         return kvm_s390_vcpu_has_irq(vcpu, 0);
3095 }
3096
3097 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3098 {
3099         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3100 }
3101
3102 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3103 {
3104         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3105         exit_sie(vcpu);
3106 }
3107
3108 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3109 {
3110         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3111 }
3112
3113 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3114 {
3115         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3116         exit_sie(vcpu);
3117 }
3118
3119 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3120 {
3121         return atomic_read(&vcpu->arch.sie_block->prog20) &
3122                (PROG_BLOCK_SIE | PROG_REQUEST);
3123 }
3124
3125 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3126 {
3127         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3128 }
3129
3130 /*
3131  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3132  * If the CPU is not running (e.g. waiting as idle) the function will
3133  * return immediately. */
3134 void exit_sie(struct kvm_vcpu *vcpu)
3135 {
3136         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3137         kvm_s390_vsie_kick(vcpu);
3138         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3139                 cpu_relax();
3140 }
3141
3142 /* Kick a guest cpu out of SIE to process a request synchronously */
3143 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3144 {
3145         kvm_make_request(req, vcpu);
3146         kvm_s390_vcpu_request(vcpu);
3147 }
3148
3149 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3150                               unsigned long end)
3151 {
3152         struct kvm *kvm = gmap->private;
3153         struct kvm_vcpu *vcpu;
3154         unsigned long prefix;
3155         int i;
3156
3157         if (gmap_is_shadow(gmap))
3158                 return;
3159         if (start >= 1UL << 31)
3160                 /* We are only interested in prefix pages */
3161                 return;
3162         kvm_for_each_vcpu(i, vcpu, kvm) {
3163                 /* match against both prefix pages */
3164                 prefix = kvm_s390_get_prefix(vcpu);
3165                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3166                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3167                                    start, end);
3168                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3169                 }
3170         }
3171 }
3172
3173 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3174 {
3175         /* do not poll with more than halt_poll_max_steal percent of steal time */
3176         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3177             halt_poll_max_steal) {
3178                 vcpu->stat.halt_no_poll_steal++;
3179                 return true;
3180         }
3181         return false;
3182 }
3183
3184 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3185 {
3186         /* kvm common code refers to this, but never calls it */
3187         BUG();
3188         return 0;
3189 }
3190
3191 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3192                                            struct kvm_one_reg *reg)
3193 {
3194         int r = -EINVAL;
3195
3196         switch (reg->id) {
3197         case KVM_REG_S390_TODPR:
3198                 r = put_user(vcpu->arch.sie_block->todpr,
3199                              (u32 __user *)reg->addr);
3200                 break;
3201         case KVM_REG_S390_EPOCHDIFF:
3202                 r = put_user(vcpu->arch.sie_block->epoch,
3203                              (u64 __user *)reg->addr);
3204                 break;
3205         case KVM_REG_S390_CPU_TIMER:
3206                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3207                              (u64 __user *)reg->addr);
3208                 break;
3209         case KVM_REG_S390_CLOCK_COMP:
3210                 r = put_user(vcpu->arch.sie_block->ckc,
3211                              (u64 __user *)reg->addr);
3212                 break;
3213         case KVM_REG_S390_PFTOKEN:
3214                 r = put_user(vcpu->arch.pfault_token,
3215                              (u64 __user *)reg->addr);
3216                 break;
3217         case KVM_REG_S390_PFCOMPARE:
3218                 r = put_user(vcpu->arch.pfault_compare,
3219                              (u64 __user *)reg->addr);
3220                 break;
3221         case KVM_REG_S390_PFSELECT:
3222                 r = put_user(vcpu->arch.pfault_select,
3223                              (u64 __user *)reg->addr);
3224                 break;
3225         case KVM_REG_S390_PP:
3226                 r = put_user(vcpu->arch.sie_block->pp,
3227                              (u64 __user *)reg->addr);
3228                 break;
3229         case KVM_REG_S390_GBEA:
3230                 r = put_user(vcpu->arch.sie_block->gbea,
3231                              (u64 __user *)reg->addr);
3232                 break;
3233         default:
3234                 break;
3235         }
3236
3237         return r;
3238 }
3239
3240 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3241                                            struct kvm_one_reg *reg)
3242 {
3243         int r = -EINVAL;
3244         __u64 val;
3245
3246         switch (reg->id) {
3247         case KVM_REG_S390_TODPR:
3248                 r = get_user(vcpu->arch.sie_block->todpr,
3249                              (u32 __user *)reg->addr);
3250                 break;
3251         case KVM_REG_S390_EPOCHDIFF:
3252                 r = get_user(vcpu->arch.sie_block->epoch,
3253                              (u64 __user *)reg->addr);
3254                 break;
3255         case KVM_REG_S390_CPU_TIMER:
3256                 r = get_user(val, (u64 __user *)reg->addr);
3257                 if (!r)
3258                         kvm_s390_set_cpu_timer(vcpu, val);
3259                 break;
3260         case KVM_REG_S390_CLOCK_COMP:
3261                 r = get_user(vcpu->arch.sie_block->ckc,
3262                              (u64 __user *)reg->addr);
3263                 break;
3264         case KVM_REG_S390_PFTOKEN:
3265                 r = get_user(vcpu->arch.pfault_token,
3266                              (u64 __user *)reg->addr);
3267                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3268                         kvm_clear_async_pf_completion_queue(vcpu);
3269                 break;
3270         case KVM_REG_S390_PFCOMPARE:
3271                 r = get_user(vcpu->arch.pfault_compare,
3272                              (u64 __user *)reg->addr);
3273                 break;
3274         case KVM_REG_S390_PFSELECT:
3275                 r = get_user(vcpu->arch.pfault_select,
3276                              (u64 __user *)reg->addr);
3277                 break;
3278         case KVM_REG_S390_PP:
3279                 r = get_user(vcpu->arch.sie_block->pp,
3280                              (u64 __user *)reg->addr);
3281                 break;
3282         case KVM_REG_S390_GBEA:
3283                 r = get_user(vcpu->arch.sie_block->gbea,
3284                              (u64 __user *)reg->addr);
3285                 break;
3286         default:
3287                 break;
3288         }
3289
3290         return r;
3291 }
3292
3293 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3294 {
3295         kvm_s390_vcpu_initial_reset(vcpu);
3296         return 0;
3297 }
3298
3299 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3300 {
3301         vcpu_load(vcpu);
3302         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3303         vcpu_put(vcpu);
3304         return 0;
3305 }
3306
3307 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3308 {
3309         vcpu_load(vcpu);
3310         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3311         vcpu_put(vcpu);
3312         return 0;
3313 }
3314
3315 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3316                                   struct kvm_sregs *sregs)
3317 {
3318         vcpu_load(vcpu);
3319
3320         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3321         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3322
3323         vcpu_put(vcpu);
3324         return 0;
3325 }
3326
3327 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3328                                   struct kvm_sregs *sregs)
3329 {
3330         vcpu_load(vcpu);
3331
3332         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3333         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3334
3335         vcpu_put(vcpu);
3336         return 0;
3337 }
3338
3339 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3340 {
3341         int ret = 0;
3342
3343         vcpu_load(vcpu);
3344
3345         if (test_fp_ctl(fpu->fpc)) {
3346                 ret = -EINVAL;
3347                 goto out;
3348         }
3349         vcpu->run->s.regs.fpc = fpu->fpc;
3350         if (MACHINE_HAS_VX)
3351                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3352                                  (freg_t *) fpu->fprs);
3353         else
3354                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3355
3356 out:
3357         vcpu_put(vcpu);
3358         return ret;
3359 }
3360
3361 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3362 {
3363         vcpu_load(vcpu);
3364
3365         /* make sure we have the latest values */
3366         save_fpu_regs();
3367         if (MACHINE_HAS_VX)
3368                 convert_vx_to_fp((freg_t *) fpu->fprs,
3369                                  (__vector128 *) vcpu->run->s.regs.vrs);
3370         else
3371                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3372         fpu->fpc = vcpu->run->s.regs.fpc;
3373
3374         vcpu_put(vcpu);
3375         return 0;
3376 }
3377
3378 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3379 {
3380         int rc = 0;
3381
3382         if (!is_vcpu_stopped(vcpu))
3383                 rc = -EBUSY;
3384         else {
3385                 vcpu->run->psw_mask = psw.mask;
3386                 vcpu->run->psw_addr = psw.addr;
3387         }
3388         return rc;
3389 }
3390
3391 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3392                                   struct kvm_translation *tr)
3393 {
3394         return -EINVAL; /* not implemented yet */
3395 }
3396
3397 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3398                               KVM_GUESTDBG_USE_HW_BP | \
3399                               KVM_GUESTDBG_ENABLE)
3400
3401 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3402                                         struct kvm_guest_debug *dbg)
3403 {
3404         int rc = 0;
3405
3406         vcpu_load(vcpu);
3407
3408         vcpu->guest_debug = 0;
3409         kvm_s390_clear_bp_data(vcpu);
3410
3411         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3412                 rc = -EINVAL;
3413                 goto out;
3414         }
3415         if (!sclp.has_gpere) {
3416                 rc = -EINVAL;
3417                 goto out;
3418         }
3419
3420         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3421                 vcpu->guest_debug = dbg->control;
3422                 /* enforce guest PER */
3423                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3424
3425                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3426                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3427         } else {
3428                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3429                 vcpu->arch.guestdbg.last_bp = 0;
3430         }
3431
3432         if (rc) {
3433                 vcpu->guest_debug = 0;
3434                 kvm_s390_clear_bp_data(vcpu);
3435                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3436         }
3437
3438 out:
3439         vcpu_put(vcpu);
3440         return rc;
3441 }
3442
3443 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3444                                     struct kvm_mp_state *mp_state)
3445 {
3446         int ret;
3447
3448         vcpu_load(vcpu);
3449
3450         /* CHECK_STOP and LOAD are not supported yet */
3451         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3452                                       KVM_MP_STATE_OPERATING;
3453
3454         vcpu_put(vcpu);
3455         return ret;
3456 }
3457
3458 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3459                                     struct kvm_mp_state *mp_state)
3460 {
3461         int rc = 0;
3462
3463         vcpu_load(vcpu);
3464
3465         /* user space knows about this interface - let it control the state */
3466         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3467
3468         switch (mp_state->mp_state) {
3469         case KVM_MP_STATE_STOPPED:
3470                 kvm_s390_vcpu_stop(vcpu);
3471                 break;
3472         case KVM_MP_STATE_OPERATING:
3473                 kvm_s390_vcpu_start(vcpu);
3474                 break;
3475         case KVM_MP_STATE_LOAD:
3476         case KVM_MP_STATE_CHECK_STOP:
3477                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3478         default:
3479                 rc = -ENXIO;
3480         }
3481
3482         vcpu_put(vcpu);
3483         return rc;
3484 }
3485
3486 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3487 {
3488         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3489 }
3490
3491 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3492 {
3493 retry:
3494         kvm_s390_vcpu_request_handled(vcpu);
3495         if (!kvm_request_pending(vcpu))
3496                 return 0;
3497         /*
3498          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3499          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3500          * This ensures that the ipte instruction for this request has
3501          * already finished. We might race against a second unmapper that
3502          * wants to set the blocking bit. Lets just retry the request loop.
3503          */
3504         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3505                 int rc;
3506                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3507                                           kvm_s390_get_prefix(vcpu),
3508                                           PAGE_SIZE * 2, PROT_WRITE);
3509                 if (rc) {
3510                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3511                         return rc;
3512                 }
3513                 goto retry;
3514         }
3515
3516         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3517                 vcpu->arch.sie_block->ihcpu = 0xffff;
3518                 goto retry;
3519         }
3520
3521         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3522                 if (!ibs_enabled(vcpu)) {
3523                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3524                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3525                 }
3526                 goto retry;
3527         }
3528
3529         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3530                 if (ibs_enabled(vcpu)) {
3531                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3532                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3533                 }
3534                 goto retry;
3535         }
3536
3537         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3538                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3539                 goto retry;
3540         }
3541
3542         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3543                 /*
3544                  * Disable CMM virtualization; we will emulate the ESSA
3545                  * instruction manually, in order to provide additional
3546                  * functionalities needed for live migration.
3547                  */
3548                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3549                 goto retry;
3550         }
3551
3552         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3553                 /*
3554                  * Re-enable CMM virtualization if CMMA is available and
3555                  * CMM has been used.
3556                  */
3557                 if ((vcpu->kvm->arch.use_cmma) &&
3558                     (vcpu->kvm->mm->context.uses_cmm))
3559                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3560                 goto retry;
3561         }
3562
3563         /* nothing to do, just clear the request */
3564         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3565         /* we left the vsie handler, nothing to do, just clear the request */
3566         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3567
3568         return 0;
3569 }
3570
3571 void kvm_s390_set_tod_clock(struct kvm *kvm,
3572                             const struct kvm_s390_vm_tod_clock *gtod)
3573 {
3574         struct kvm_vcpu *vcpu;
3575         struct kvm_s390_tod_clock_ext htod;
3576         int i;
3577
3578         mutex_lock(&kvm->lock);
3579         preempt_disable();
3580
3581         get_tod_clock_ext((char *)&htod);
3582
3583         kvm->arch.epoch = gtod->tod - htod.tod;
3584         kvm->arch.epdx = 0;
3585         if (test_kvm_facility(kvm, 139)) {
3586                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3587                 if (kvm->arch.epoch > gtod->tod)
3588                         kvm->arch.epdx -= 1;
3589         }
3590
3591         kvm_s390_vcpu_block_all(kvm);
3592         kvm_for_each_vcpu(i, vcpu, kvm) {
3593                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3594                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3595         }
3596
3597         kvm_s390_vcpu_unblock_all(kvm);
3598         preempt_enable();
3599         mutex_unlock(&kvm->lock);
3600 }
3601
3602 /**
3603  * kvm_arch_fault_in_page - fault-in guest page if necessary
3604  * @vcpu: The corresponding virtual cpu
3605  * @gpa: Guest physical address
3606  * @writable: Whether the page should be writable or not
3607  *
3608  * Make sure that a guest page has been faulted-in on the host.
3609  *
3610  * Return: Zero on success, negative error code otherwise.
3611  */
3612 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3613 {
3614         return gmap_fault(vcpu->arch.gmap, gpa,
3615                           writable ? FAULT_FLAG_WRITE : 0);
3616 }
3617
3618 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3619                                       unsigned long token)
3620 {
3621         struct kvm_s390_interrupt inti;
3622         struct kvm_s390_irq irq;
3623
3624         if (start_token) {
3625                 irq.u.ext.ext_params2 = token;
3626                 irq.type = KVM_S390_INT_PFAULT_INIT;
3627                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3628         } else {
3629                 inti.type = KVM_S390_INT_PFAULT_DONE;
3630                 inti.parm64 = token;
3631                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3632         }
3633 }
3634
3635 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3636                                      struct kvm_async_pf *work)
3637 {
3638         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3639         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3640 }
3641
3642 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3643                                  struct kvm_async_pf *work)
3644 {
3645         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3646         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3647 }
3648
3649 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3650                                struct kvm_async_pf *work)
3651 {
3652         /* s390 will always inject the page directly */
3653 }
3654
3655 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3656 {
3657         /*
3658          * s390 will always inject the page directly,
3659          * but we still want check_async_completion to cleanup
3660          */
3661         return true;
3662 }
3663
3664 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3665 {
3666         hva_t hva;
3667         struct kvm_arch_async_pf arch;
3668         int rc;
3669
3670         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3671                 return 0;
3672         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3673             vcpu->arch.pfault_compare)
3674                 return 0;
3675         if (psw_extint_disabled(vcpu))
3676                 return 0;
3677         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3678                 return 0;
3679         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3680                 return 0;
3681         if (!vcpu->arch.gmap->pfault_enabled)
3682                 return 0;
3683
3684         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3685         hva += current->thread.gmap_addr & ~PAGE_MASK;
3686         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3687                 return 0;
3688
3689         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3690         return rc;
3691 }
3692
3693 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3694 {
3695         int rc, cpuflags;
3696
3697         /*
3698          * On s390 notifications for arriving pages will be delivered directly
3699          * to the guest but the house keeping for completed pfaults is
3700          * handled outside the worker.
3701          */
3702         kvm_check_async_pf_completion(vcpu);
3703
3704         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3705         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3706
3707         if (need_resched())
3708                 schedule();
3709
3710         if (test_cpu_flag(CIF_MCCK_PENDING))
3711                 s390_handle_mcck();
3712
3713         if (!kvm_is_ucontrol(vcpu->kvm)) {
3714                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3715                 if (rc)
3716                         return rc;
3717         }
3718
3719         rc = kvm_s390_handle_requests(vcpu);
3720         if (rc)
3721                 return rc;
3722
3723         if (guestdbg_enabled(vcpu)) {
3724                 kvm_s390_backup_guest_per_regs(vcpu);
3725                 kvm_s390_patch_guest_per_regs(vcpu);
3726         }
3727
3728         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3729
3730         vcpu->arch.sie_block->icptcode = 0;
3731         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3732         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3733         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3734
3735         return 0;
3736 }
3737
3738 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3739 {
3740         struct kvm_s390_pgm_info pgm_info = {
3741                 .code = PGM_ADDRESSING,
3742         };
3743         u8 opcode, ilen;
3744         int rc;
3745
3746         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3747         trace_kvm_s390_sie_fault(vcpu);
3748
3749         /*
3750          * We want to inject an addressing exception, which is defined as a
3751          * suppressing or terminating exception. However, since we came here
3752          * by a DAT access exception, the PSW still points to the faulting
3753          * instruction since DAT exceptions are nullifying. So we've got
3754          * to look up the current opcode to get the length of the instruction
3755          * to be able to forward the PSW.
3756          */
3757         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3758         ilen = insn_length(opcode);
3759         if (rc < 0) {
3760                 return rc;
3761         } else if (rc) {
3762                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3763                  * Forward by arbitrary ilc, injection will take care of
3764                  * nullification if necessary.
3765                  */
3766                 pgm_info = vcpu->arch.pgm;
3767                 ilen = 4;
3768         }
3769         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3770         kvm_s390_forward_psw(vcpu, ilen);
3771         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3772 }
3773
3774 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3775 {
3776         struct mcck_volatile_info *mcck_info;
3777         struct sie_page *sie_page;
3778
3779         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3780                    vcpu->arch.sie_block->icptcode);
3781         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3782
3783         if (guestdbg_enabled(vcpu))
3784                 kvm_s390_restore_guest_per_regs(vcpu);
3785
3786         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3787         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3788
3789         if (exit_reason == -EINTR) {
3790                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3791                 sie_page = container_of(vcpu->arch.sie_block,
3792                                         struct sie_page, sie_block);
3793                 mcck_info = &sie_page->mcck_info;
3794                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3795                 return 0;
3796         }
3797
3798         if (vcpu->arch.sie_block->icptcode > 0) {
3799                 int rc = kvm_handle_sie_intercept(vcpu);
3800
3801                 if (rc != -EOPNOTSUPP)
3802                         return rc;
3803                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3804                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3805                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3806                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3807                 return -EREMOTE;
3808         } else if (exit_reason != -EFAULT) {
3809                 vcpu->stat.exit_null++;
3810                 return 0;
3811         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3812                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3813                 vcpu->run->s390_ucontrol.trans_exc_code =
3814                                                 current->thread.gmap_addr;
3815                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3816                 return -EREMOTE;
3817         } else if (current->thread.gmap_pfault) {
3818                 trace_kvm_s390_major_guest_pfault(vcpu);
3819                 current->thread.gmap_pfault = 0;
3820                 if (kvm_arch_setup_async_pf(vcpu))
3821                         return 0;
3822                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3823         }
3824         return vcpu_post_run_fault_in_sie(vcpu);
3825 }
3826
3827 static int __vcpu_run(struct kvm_vcpu *vcpu)
3828 {
3829         int rc, exit_reason;
3830
3831         /*
3832          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3833          * ning the guest), so that memslots (and other stuff) are protected
3834          */
3835         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3836
3837         do {
3838                 rc = vcpu_pre_run(vcpu);
3839                 if (rc)
3840                         break;
3841
3842                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3843                 /*
3844                  * As PF_VCPU will be used in fault handler, between
3845                  * guest_enter and guest_exit should be no uaccess.
3846                  */
3847                 local_irq_disable();
3848                 guest_enter_irqoff();
3849                 __disable_cpu_timer_accounting(vcpu);
3850                 local_irq_enable();
3851                 exit_reason = sie64a(vcpu->arch.sie_block,
3852                                      vcpu->run->s.regs.gprs);
3853                 local_irq_disable();
3854                 __enable_cpu_timer_accounting(vcpu);
3855                 guest_exit_irqoff();
3856                 local_irq_enable();
3857                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3858
3859                 rc = vcpu_post_run(vcpu, exit_reason);
3860         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3861
3862         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3863         return rc;
3864 }
3865
3866 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3867 {
3868         struct runtime_instr_cb *riccb;
3869         struct gs_cb *gscb;
3870
3871         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3872         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3873         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3874         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3875         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3876                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3877         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3878                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3879                 /* some control register changes require a tlb flush */
3880                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3881         }
3882         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3883                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3884                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3885                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3886                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3887                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3888         }
3889         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3890                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3891                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3892                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3893                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3894                         kvm_clear_async_pf_completion_queue(vcpu);
3895         }
3896         /*
3897          * If userspace sets the riccb (e.g. after migration) to a valid state,
3898          * we should enable RI here instead of doing the lazy enablement.
3899          */
3900         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3901             test_kvm_facility(vcpu->kvm, 64) &&
3902             riccb->v &&
3903             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3904                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3905                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3906         }
3907         /*
3908          * If userspace sets the gscb (e.g. after migration) to non-zero,
3909          * we should enable GS here instead of doing the lazy enablement.
3910          */
3911         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3912             test_kvm_facility(vcpu->kvm, 133) &&
3913             gscb->gssm &&
3914             !vcpu->arch.gs_enabled) {
3915                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3916                 vcpu->arch.sie_block->ecb |= ECB_GS;
3917                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3918                 vcpu->arch.gs_enabled = 1;
3919         }
3920         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3921             test_kvm_facility(vcpu->kvm, 82)) {
3922                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3923                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3924         }
3925         save_access_regs(vcpu->arch.host_acrs);
3926         restore_access_regs(vcpu->run->s.regs.acrs);
3927         /* save host (userspace) fprs/vrs */
3928         save_fpu_regs();
3929         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3930         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3931         if (MACHINE_HAS_VX)
3932                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3933         else
3934                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3935         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3936         if (test_fp_ctl(current->thread.fpu.fpc))
3937                 /* User space provided an invalid FPC, let's clear it */
3938                 current->thread.fpu.fpc = 0;
3939         if (MACHINE_HAS_GS) {
3940                 preempt_disable();
3941                 __ctl_set_bit(2, 4);
3942                 if (current->thread.gs_cb) {
3943                         vcpu->arch.host_gscb = current->thread.gs_cb;
3944                         save_gs_cb(vcpu->arch.host_gscb);
3945                 }
3946                 if (vcpu->arch.gs_enabled) {
3947                         current->thread.gs_cb = (struct gs_cb *)
3948                                                 &vcpu->run->s.regs.gscb;
3949                         restore_gs_cb(current->thread.gs_cb);
3950                 }
3951                 preempt_enable();
3952         }
3953         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3954
3955         kvm_run->kvm_dirty_regs = 0;
3956 }
3957
3958 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3959 {
3960         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3961         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3962         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3963         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3964         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3965         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3966         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3967         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3968         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3969         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3970         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3971         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3972         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3973         save_access_regs(vcpu->run->s.regs.acrs);
3974         restore_access_regs(vcpu->arch.host_acrs);
3975         /* Save guest register state */
3976         save_fpu_regs();
3977         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3978         /* Restore will be done lazily at return */
3979         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3980         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3981         if (MACHINE_HAS_GS) {
3982                 __ctl_set_bit(2, 4);
3983                 if (vcpu->arch.gs_enabled)
3984                         save_gs_cb(current->thread.gs_cb);
3985                 preempt_disable();
3986                 current->thread.gs_cb = vcpu->arch.host_gscb;
3987                 restore_gs_cb(vcpu->arch.host_gscb);
3988                 preempt_enable();
3989                 if (!vcpu->arch.host_gscb)
3990                         __ctl_clear_bit(2, 4);
3991                 vcpu->arch.host_gscb = NULL;
3992         }
3993         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3994 }
3995
3996 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3997 {
3998         int rc;
3999
4000         if (kvm_run->immediate_exit)
4001                 return -EINTR;
4002
4003         vcpu_load(vcpu);
4004
4005         if (guestdbg_exit_pending(vcpu)) {
4006                 kvm_s390_prepare_debug_exit(vcpu);
4007                 rc = 0;
4008                 goto out;
4009         }
4010
4011         kvm_sigset_activate(vcpu);
4012
4013         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4014                 kvm_s390_vcpu_start(vcpu);
4015         } else if (is_vcpu_stopped(vcpu)) {
4016                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4017                                    vcpu->vcpu_id);
4018                 rc = -EINVAL;
4019                 goto out;
4020         }
4021
4022         sync_regs(vcpu, kvm_run);
4023         enable_cpu_timer_accounting(vcpu);
4024
4025         might_fault();
4026         rc = __vcpu_run(vcpu);
4027
4028         if (signal_pending(current) && !rc) {
4029                 kvm_run->exit_reason = KVM_EXIT_INTR;
4030                 rc = -EINTR;
4031         }
4032
4033         if (guestdbg_exit_pending(vcpu) && !rc)  {
4034                 kvm_s390_prepare_debug_exit(vcpu);
4035                 rc = 0;
4036         }
4037
4038         if (rc == -EREMOTE) {
4039                 /* userspace support is needed, kvm_run has been prepared */
4040                 rc = 0;
4041         }
4042
4043         disable_cpu_timer_accounting(vcpu);
4044         store_regs(vcpu, kvm_run);
4045
4046         kvm_sigset_deactivate(vcpu);
4047
4048         vcpu->stat.exit_userspace++;
4049 out:
4050         vcpu_put(vcpu);
4051         return rc;
4052 }
4053
4054 /*
4055  * store status at address
4056  * we use have two special cases:
4057  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4058  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4059  */
4060 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4061 {
4062         unsigned char archmode = 1;
4063         freg_t fprs[NUM_FPRS];
4064         unsigned int px;
4065         u64 clkcomp, cputm;
4066         int rc;
4067
4068         px = kvm_s390_get_prefix(vcpu);
4069         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4070                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4071                         return -EFAULT;
4072                 gpa = 0;
4073         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4074                 if (write_guest_real(vcpu, 163, &archmode, 1))
4075                         return -EFAULT;
4076                 gpa = px;
4077         } else
4078                 gpa -= __LC_FPREGS_SAVE_AREA;
4079
4080         /* manually convert vector registers if necessary */
4081         if (MACHINE_HAS_VX) {
4082                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4083                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4084                                      fprs, 128);
4085         } else {
4086                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4087                                      vcpu->run->s.regs.fprs, 128);
4088         }
4089         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4090                               vcpu->run->s.regs.gprs, 128);
4091         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4092                               &vcpu->arch.sie_block->gpsw, 16);
4093         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4094                               &px, 4);
4095         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4096                               &vcpu->run->s.regs.fpc, 4);
4097         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4098                               &vcpu->arch.sie_block->todpr, 4);
4099         cputm = kvm_s390_get_cpu_timer(vcpu);
4100         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4101                               &cputm, 8);
4102         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4103         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4104                               &clkcomp, 8);
4105         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4106                               &vcpu->run->s.regs.acrs, 64);
4107         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4108                               &vcpu->arch.sie_block->gcr, 128);
4109         return rc ? -EFAULT : 0;
4110 }
4111
4112 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4113 {
4114         /*
4115          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4116          * switch in the run ioctl. Let's update our copies before we save
4117          * it into the save area
4118          */
4119         save_fpu_regs();
4120         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4121         save_access_regs(vcpu->run->s.regs.acrs);
4122
4123         return kvm_s390_store_status_unloaded(vcpu, addr);
4124 }
4125
4126 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4127 {
4128         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4129         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4130 }
4131
4132 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4133 {
4134         unsigned int i;
4135         struct kvm_vcpu *vcpu;
4136
4137         kvm_for_each_vcpu(i, vcpu, kvm) {
4138                 __disable_ibs_on_vcpu(vcpu);
4139         }
4140 }
4141
4142 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4143 {
4144         if (!sclp.has_ibs)
4145                 return;
4146         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4147         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4148 }
4149
4150 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4151 {
4152         int i, online_vcpus, started_vcpus = 0;
4153
4154         if (!is_vcpu_stopped(vcpu))
4155                 return;
4156
4157         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4158         /* Only one cpu at a time may enter/leave the STOPPED state. */
4159         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4160         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4161
4162         for (i = 0; i < online_vcpus; i++) {
4163                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4164                         started_vcpus++;
4165         }
4166
4167         if (started_vcpus == 0) {
4168                 /* we're the only active VCPU -> speed it up */
4169                 __enable_ibs_on_vcpu(vcpu);
4170         } else if (started_vcpus == 1) {
4171                 /*
4172                  * As we are starting a second VCPU, we have to disable
4173                  * the IBS facility on all VCPUs to remove potentially
4174                  * oustanding ENABLE requests.
4175                  */
4176                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4177         }
4178
4179         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4180         /*
4181          * Another VCPU might have used IBS while we were offline.
4182          * Let's play safe and flush the VCPU at startup.
4183          */
4184         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4185         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4186         return;
4187 }
4188
4189 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4190 {
4191         int i, online_vcpus, started_vcpus = 0;
4192         struct kvm_vcpu *started_vcpu = NULL;
4193
4194         if (is_vcpu_stopped(vcpu))
4195                 return;
4196
4197         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4198         /* Only one cpu at a time may enter/leave the STOPPED state. */
4199         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4200         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4201
4202         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4203         kvm_s390_clear_stop_irq(vcpu);
4204
4205         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4206         __disable_ibs_on_vcpu(vcpu);
4207
4208         for (i = 0; i < online_vcpus; i++) {
4209                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4210                         started_vcpus++;
4211                         started_vcpu = vcpu->kvm->vcpus[i];
4212                 }
4213         }
4214
4215         if (started_vcpus == 1) {
4216                 /*
4217                  * As we only have one VCPU left, we want to enable the
4218                  * IBS facility for that VCPU to speed it up.
4219                  */
4220                 __enable_ibs_on_vcpu(started_vcpu);
4221         }
4222
4223         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4224         return;
4225 }
4226
4227 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4228                                      struct kvm_enable_cap *cap)
4229 {
4230         int r;
4231
4232         if (cap->flags)
4233                 return -EINVAL;
4234
4235         switch (cap->cap) {
4236         case KVM_CAP_S390_CSS_SUPPORT:
4237                 if (!vcpu->kvm->arch.css_support) {
4238                         vcpu->kvm->arch.css_support = 1;
4239                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4240                         trace_kvm_s390_enable_css(vcpu->kvm);
4241                 }
4242                 r = 0;
4243                 break;
4244         default:
4245                 r = -EINVAL;
4246                 break;
4247         }
4248         return r;
4249 }
4250
4251 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4252                                   struct kvm_s390_mem_op *mop)
4253 {
4254         void __user *uaddr = (void __user *)mop->buf;
4255         void *tmpbuf = NULL;
4256         int r, srcu_idx;
4257         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4258                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4259
4260         if (mop->flags & ~supported_flags)
4261                 return -EINVAL;
4262
4263         if (mop->size > MEM_OP_MAX_SIZE)
4264                 return -E2BIG;
4265
4266         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4267                 tmpbuf = vmalloc(mop->size);
4268                 if (!tmpbuf)
4269                         return -ENOMEM;
4270         }
4271
4272         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4273
4274         switch (mop->op) {
4275         case KVM_S390_MEMOP_LOGICAL_READ:
4276                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4277                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4278                                             mop->size, GACC_FETCH);
4279                         break;
4280                 }
4281                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4282                 if (r == 0) {
4283                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4284                                 r = -EFAULT;
4285                 }
4286                 break;
4287         case KVM_S390_MEMOP_LOGICAL_WRITE:
4288                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4289                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4290                                             mop->size, GACC_STORE);
4291                         break;
4292                 }
4293                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4294                         r = -EFAULT;
4295                         break;
4296                 }
4297                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4298                 break;
4299         default:
4300                 r = -EINVAL;
4301         }
4302
4303         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4304
4305         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4306                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4307
4308         vfree(tmpbuf);
4309         return r;
4310 }
4311
4312 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4313                                unsigned int ioctl, unsigned long arg)
4314 {
4315         struct kvm_vcpu *vcpu = filp->private_data;
4316         void __user *argp = (void __user *)arg;
4317
4318         switch (ioctl) {
4319         case KVM_S390_IRQ: {
4320                 struct kvm_s390_irq s390irq;
4321
4322                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4323                         return -EFAULT;
4324                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4325         }
4326         case KVM_S390_INTERRUPT: {
4327                 struct kvm_s390_interrupt s390int;
4328                 struct kvm_s390_irq s390irq;
4329
4330                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4331                         return -EFAULT;
4332                 if (s390int_to_s390irq(&s390int, &s390irq))
4333                         return -EINVAL;
4334                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4335         }
4336         }
4337         return -ENOIOCTLCMD;
4338 }
4339
4340 long kvm_arch_vcpu_ioctl(struct file *filp,
4341                          unsigned int ioctl, unsigned long arg)
4342 {
4343         struct kvm_vcpu *vcpu = filp->private_data;
4344         void __user *argp = (void __user *)arg;
4345         int idx;
4346         long r;
4347
4348         vcpu_load(vcpu);
4349
4350         switch (ioctl) {
4351         case KVM_S390_STORE_STATUS:
4352                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4353                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4354                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4355                 break;
4356         case KVM_S390_SET_INITIAL_PSW: {
4357                 psw_t psw;
4358
4359                 r = -EFAULT;
4360                 if (copy_from_user(&psw, argp, sizeof(psw)))
4361                         break;
4362                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4363                 break;
4364         }
4365         case KVM_S390_INITIAL_RESET:
4366                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4367                 break;
4368         case KVM_SET_ONE_REG:
4369         case KVM_GET_ONE_REG: {
4370                 struct kvm_one_reg reg;
4371                 r = -EFAULT;
4372                 if (copy_from_user(&reg, argp, sizeof(reg)))
4373                         break;
4374                 if (ioctl == KVM_SET_ONE_REG)
4375                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4376                 else
4377                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4378                 break;
4379         }
4380 #ifdef CONFIG_KVM_S390_UCONTROL
4381         case KVM_S390_UCAS_MAP: {
4382                 struct kvm_s390_ucas_mapping ucasmap;
4383
4384                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4385                         r = -EFAULT;
4386                         break;
4387                 }
4388
4389                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4390                         r = -EINVAL;
4391                         break;
4392                 }
4393
4394                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4395                                      ucasmap.vcpu_addr, ucasmap.length);
4396                 break;
4397         }
4398         case KVM_S390_UCAS_UNMAP: {
4399                 struct kvm_s390_ucas_mapping ucasmap;
4400
4401                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4402                         r = -EFAULT;
4403                         break;
4404                 }
4405
4406                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4407                         r = -EINVAL;
4408                         break;
4409                 }
4410
4411                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4412                         ucasmap.length);
4413                 break;
4414         }
4415 #endif
4416         case KVM_S390_VCPU_FAULT: {
4417                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4418                 break;
4419         }
4420         case KVM_ENABLE_CAP:
4421         {
4422                 struct kvm_enable_cap cap;
4423                 r = -EFAULT;
4424                 if (copy_from_user(&cap, argp, sizeof(cap)))
4425                         break;
4426                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4427                 break;
4428         }
4429         case KVM_S390_MEM_OP: {
4430                 struct kvm_s390_mem_op mem_op;
4431
4432                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4433                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4434                 else
4435                         r = -EFAULT;
4436                 break;
4437         }
4438         case KVM_S390_SET_IRQ_STATE: {
4439                 struct kvm_s390_irq_state irq_state;
4440
4441                 r = -EFAULT;
4442                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4443                         break;
4444                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4445                     irq_state.len == 0 ||
4446                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4447                         r = -EINVAL;
4448                         break;
4449                 }
4450                 /* do not use irq_state.flags, it will break old QEMUs */
4451                 r = kvm_s390_set_irq_state(vcpu,
4452                                            (void __user *) irq_state.buf,
4453                                            irq_state.len);
4454                 break;
4455         }
4456         case KVM_S390_GET_IRQ_STATE: {
4457                 struct kvm_s390_irq_state irq_state;
4458
4459                 r = -EFAULT;
4460                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4461                         break;
4462                 if (irq_state.len == 0) {
4463                         r = -EINVAL;
4464                         break;
4465                 }
4466                 /* do not use irq_state.flags, it will break old QEMUs */
4467                 r = kvm_s390_get_irq_state(vcpu,
4468                                            (__u8 __user *)  irq_state.buf,
4469                                            irq_state.len);
4470                 break;
4471         }
4472         default:
4473                 r = -ENOTTY;
4474         }
4475
4476         vcpu_put(vcpu);
4477         return r;
4478 }
4479
4480 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4481 {
4482 #ifdef CONFIG_KVM_S390_UCONTROL
4483         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4484                  && (kvm_is_ucontrol(vcpu->kvm))) {
4485                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4486                 get_page(vmf->page);
4487                 return 0;
4488         }
4489 #endif
4490         return VM_FAULT_SIGBUS;
4491 }
4492
4493 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4494                             unsigned long npages)
4495 {
4496         return 0;
4497 }
4498
4499 /* Section: memory related */
4500 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4501                                    struct kvm_memory_slot *memslot,
4502                                    const struct kvm_userspace_memory_region *mem,
4503                                    enum kvm_mr_change change)
4504 {
4505         /* A few sanity checks. We can have memory slots which have to be
4506            located/ended at a segment boundary (1MB). The memory in userland is
4507            ok to be fragmented into various different vmas. It is okay to mmap()
4508            and munmap() stuff in this slot after doing this call at any time */
4509
4510         if (mem->userspace_addr & 0xffffful)
4511                 return -EINVAL;
4512
4513         if (mem->memory_size & 0xffffful)
4514                 return -EINVAL;
4515
4516         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4517                 return -EINVAL;
4518
4519         return 0;
4520 }
4521
4522 void kvm_arch_commit_memory_region(struct kvm *kvm,
4523                                 const struct kvm_userspace_memory_region *mem,
4524                                 const struct kvm_memory_slot *old,
4525                                 const struct kvm_memory_slot *new,
4526                                 enum kvm_mr_change change)
4527 {
4528         int rc = 0;
4529
4530         switch (change) {
4531         case KVM_MR_DELETE:
4532                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4533                                         old->npages * PAGE_SIZE);
4534                 break;
4535         case KVM_MR_MOVE:
4536                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4537                                         old->npages * PAGE_SIZE);
4538                 if (rc)
4539                         break;
4540                 /* FALLTHROUGH */
4541         case KVM_MR_CREATE:
4542                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4543                                       mem->guest_phys_addr, mem->memory_size);
4544                 break;
4545         case KVM_MR_FLAGS_ONLY:
4546                 break;
4547         default:
4548                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4549         }
4550         if (rc)
4551                 pr_warn("failed to commit memory region\n");
4552         return;
4553 }
4554
4555 static inline unsigned long nonhyp_mask(int i)
4556 {
4557         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4558
4559         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4560 }
4561
4562 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4563 {
4564         vcpu->valid_wakeup = false;
4565 }
4566
4567 static int __init kvm_s390_init(void)
4568 {
4569         int i;
4570
4571         if (!sclp.has_sief2) {
4572                 pr_info("SIE is not available\n");
4573                 return -ENODEV;
4574         }
4575
4576         if (nested && hpage) {
4577                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4578                 return -EINVAL;
4579         }
4580
4581         for (i = 0; i < 16; i++)
4582                 kvm_s390_fac_base[i] |=
4583                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4584
4585         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4586 }
4587
4588 static void __exit kvm_s390_exit(void)
4589 {
4590         kvm_exit();
4591 }
4592
4593 module_init(kvm_s390_init);
4594 module_exit(kvm_s390_exit);
4595
4596 /*
4597  * Enable autoloading of the kvm module.
4598  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4599  * since x86 takes a different approach.
4600  */
4601 #include <linux/miscdevice.h>
4602 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4603 MODULE_ALIAS("devname:kvm");