arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include "kvm-s390.h"
  48 #include "gaccess.h"
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  85         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  86         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  87         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  88         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  89         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  90         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  91         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  92         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  93         { "deliver_program", VCPU_STAT(deliver_program) },
  94         { "deliver_io", VCPU_STAT(deliver_io) },
  95         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  96         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  97         { "inject_ckc", VCPU_STAT(inject_ckc) },
  98         { "inject_cputm", VCPU_STAT(inject_cputm) },
  99         { "inject_external_call", VCPU_STAT(inject_external_call) },
 100         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 101         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 102         { "inject_io", VM_STAT(inject_io) },
 103         { "inject_mchk", VCPU_STAT(inject_mchk) },
 104         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 105         { "inject_program", VCPU_STAT(inject_program) },
 106         { "inject_restart", VCPU_STAT(inject_restart) },
 107         { "inject_service_signal", VM_STAT(inject_service_signal) },
 108         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 109         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 110         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 111         { "inject_virtio", VM_STAT(inject_virtio) },
 112         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 113         { "instruction_gs", VCPU_STAT(instruction_gs) },
 114         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 115         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 116         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 117         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 118         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 119         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 120         { "instruction_sck", VCPU_STAT(instruction_sck) },
 121         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 122         { "instruction_spx", VCPU_STAT(instruction_spx) },
 123         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 124         { "instruction_stap", VCPU_STAT(instruction_stap) },
 125         { "instruction_iske", VCPU_STAT(instruction_iske) },
 126         { "instruction_ri", VCPU_STAT(instruction_ri) },
 127         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 128         { "instruction_sske", VCPU_STAT(instruction_sske) },
 129         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 130         { "instruction_essa", VCPU_STAT(instruction_essa) },
 131         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 132         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 133         { "instruction_tb", VCPU_STAT(instruction_tb) },
 134         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 135         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 136         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 137         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 138         { "instruction_sie", VCPU_STAT(instruction_sie) },
 139         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 140         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 141         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 142         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 143         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 144         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 145         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 146         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 147         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 148         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 149         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 150         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 151         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 152         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 153         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 154         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 155         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 156         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 157         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 158         { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
 159         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 160         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 161         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 162         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 163         { NULL }
 164 };
 165
 166 struct kvm_s390_tod_clock_ext {
 167         __u8 epoch_idx;
 168         __u64 tod;
 169         __u8 reserved[7];
 170 } __packed;
 171
 172 /* allow nested virtualization in KVM (if enabled by user space) */
 173 static int nested;
 174 module_param(nested, int, S_IRUGO);
 175 MODULE_PARM_DESC(nested, "Nested virtualization support");
 176
 177 /* allow 1m huge page guest backing, if !nested */
 178 static int hpage;
 179 module_param(hpage, int, 0444);
 180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 181
 182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 183 static u8 halt_poll_max_steal = 10;
 184 module_param(halt_poll_max_steal, byte, 0644);
 185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 186
 187 /*
 188  * For now we handle at most 16 double words as this is what the s390 base
 189  * kernel handles and stores in the prefix page. If we ever need to go beyond
 190  * this, this requires changes to code, but the external uapi can stay.
 191  */
 192 #define SIZE_INTERNAL 16
 193
 194 /*
 195  * Base feature mask that defines default mask for facilities. Consists of the
 196  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 197  */
 198 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 199 /*
 200  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 201  * and defines the facilities that can be enabled via a cpu model.
 202  */
 203 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 204
 205 static unsigned long kvm_s390_fac_size(void)
 206 {
 207         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 208         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 209         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 210                 sizeof(S390_lowcore.stfle_fac_list));
 211
 212         return SIZE_INTERNAL;
 213 }
 214
 215 /* available cpu features supported by kvm */
 216 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 217 /* available subfunctions indicated via query / "test bit" */
 218 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 219
 220 static struct gmap_notifier gmap_notifier;
 221 static struct gmap_notifier vsie_gmap_notifier;
 222 debug_info_t *kvm_s390_dbf;
 223
 224 /* Section: not file related */
 225 int kvm_arch_hardware_enable(void)
 226 {
 227         /* every s390 is virtualization enabled ;-) */
 228         return 0;
 229 }
 230
 231 int kvm_arch_check_processor_compat(void)
 232 {
 233         return 0;
 234 }
 235
 236 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 237                               unsigned long end);
 238
 239 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 240 {
 241         u8 delta_idx = 0;
 242
 243         /*
 244          * The TOD jumps by delta, we have to compensate this by adding
 245          * -delta to the epoch.
 246          */
 247         delta = -delta;
 248
 249         /* sign-extension - we're adding to signed values below */
 250         if ((s64)delta < 0)
 251                 delta_idx = -1;
 252
 253         scb->epoch += delta;
 254         if (scb->ecd & ECD_MEF) {
 255                 scb->epdx += delta_idx;
 256                 if (scb->epoch < delta)
 257                         scb->epdx += 1;
 258         }
 259 }
 260
 261 /*
 262  * This callback is executed during stop_machine(). All CPUs are therefore
 263  * temporarily stopped. In order not to change guest behavior, we have to
 264  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 265  * so a CPU won't be stopped while calculating with the epoch.
 266  */
 267 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 268                           void *v)
 269 {
 270         struct kvm *kvm;
 271         struct kvm_vcpu *vcpu;
 272         int i;
 273         unsigned long long *delta = v;
 274
 275         list_for_each_entry(kvm, &vm_list, vm_list) {
 276                 kvm_for_each_vcpu(i, vcpu, kvm) {
 277                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 278                         if (i == 0) {
 279                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 280                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 281                         }
 282                         if (vcpu->arch.cputm_enabled)
 283                                 vcpu->arch.cputm_start += *delta;
 284                         if (vcpu->arch.vsie_block)
 285                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 286                                                    *delta);
 287                 }
 288         }
 289         return NOTIFY_OK;
 290 }
 291
 292 static struct notifier_block kvm_clock_notifier = {
 293         .notifier_call = kvm_clock_sync,
 294 };
 295
 296 int kvm_arch_hardware_setup(void)
 297 {
 298         gmap_notifier.notifier_call = kvm_gmap_notifier;
 299         gmap_register_pte_notifier(&gmap_notifier);
 300         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 301         gmap_register_pte_notifier(&vsie_gmap_notifier);
 302         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 303                                        &kvm_clock_notifier);
 304         return 0;
 305 }
 306
 307 void kvm_arch_hardware_unsetup(void)
 308 {
 309         gmap_unregister_pte_notifier(&gmap_notifier);
 310         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 311         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 312                                          &kvm_clock_notifier);
 313 }
 314
 315 static void allow_cpu_feat(unsigned long nr)
 316 {
 317         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 318 }
 319
 320 static inline int plo_test_bit(unsigned char nr)
 321 {
 322         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 323         int cc;
 324
 325         asm volatile(
 326                 /* Parameter registers are ignored for "test bit" */
 327                 "       plo     0,0,0,0(0)\n"
 328                 "       ipm     %0\n"
 329                 "       srl     %0,28\n"
 330                 : "=d" (cc)
 331                 : "d" (r0)
 332                 : "cc");
 333         return cc == 0;
 334 }
 335
 336 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 337 {
 338         register unsigned long r0 asm("0") = 0; /* query function */
 339         register unsigned long r1 asm("1") = (unsigned long) query;
 340
 341         asm volatile(
 342                 /* Parameter regs are ignored */
 343                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 344                 :
 345                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
 346                 : "cc", "memory");
 347 }
 348
 349 #define INSN_SORTL 0xb938
 350 #define INSN_DFLTCC 0xb939
 351
 352 static void kvm_s390_cpu_feat_init(void)
 353 {
 354         int i;
 355
 356         for (i = 0; i < 256; ++i) {
 357                 if (plo_test_bit(i))
 358                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 359         }
 360
 361         if (test_facility(28)) /* TOD-clock steering */
 362                 ptff(kvm_s390_available_subfunc.ptff,
 363                      sizeof(kvm_s390_available_subfunc.ptff),
 364                      PTFF_QAF);
 365
 366         if (test_facility(17)) { /* MSA */
 367                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 368                               kvm_s390_available_subfunc.kmac);
 369                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 370                               kvm_s390_available_subfunc.kmc);
 371                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 372                               kvm_s390_available_subfunc.km);
 373                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 374                               kvm_s390_available_subfunc.kimd);
 375                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 376                               kvm_s390_available_subfunc.klmd);
 377         }
 378         if (test_facility(76)) /* MSA3 */
 379                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 380                               kvm_s390_available_subfunc.pckmo);
 381         if (test_facility(77)) { /* MSA4 */
 382                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 383                               kvm_s390_available_subfunc.kmctr);
 384                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 385                               kvm_s390_available_subfunc.kmf);
 386                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 387                               kvm_s390_available_subfunc.kmo);
 388                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 389                               kvm_s390_available_subfunc.pcc);
 390         }
 391         if (test_facility(57)) /* MSA5 */
 392                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 393                               kvm_s390_available_subfunc.ppno);
 394
 395         if (test_facility(146)) /* MSA8 */
 396                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 397                               kvm_s390_available_subfunc.kma);
 398
 399         if (test_facility(155)) /* MSA9 */
 400                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 401                               kvm_s390_available_subfunc.kdsa);
 402
 403         if (test_facility(150)) /* SORTL */
 404                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 405
 406         if (test_facility(151)) /* DFLTCC */
 407                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 408
 409         if (MACHINE_HAS_ESOP)
 410                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 411         /*
 412          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 413          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 414          */
 415         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 416             !test_facility(3) || !nested)
 417                 return;
 418         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 419         if (sclp.has_64bscao)
 420                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 421         if (sclp.has_siif)
 422                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 423         if (sclp.has_gpere)
 424                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 425         if (sclp.has_gsls)
 426                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 427         if (sclp.has_ib)
 428                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 429         if (sclp.has_cei)
 430                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 431         if (sclp.has_ibs)
 432                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 433         if (sclp.has_kss)
 434                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 435         /*
 436          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 437          * all skey handling functions read/set the skey from the PGSTE
 438          * instead of the real storage key.
 439          *
 440          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 441          * pages being detected as preserved although they are resident.
 442          *
 443          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 444          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 445          *
 446          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 447          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 448          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 449          *
 450          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 451          * cannot easily shadow the SCA because of the ipte lock.
 452          */
 453 }
 454
 455 int kvm_arch_init(void *opaque)
 456 {
 457         int rc = -ENOMEM;
 458
 459         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 460         if (!kvm_s390_dbf)
 461                 return -ENOMEM;
 462
 463         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
 464                 goto out;
 465
 466         kvm_s390_cpu_feat_init();
 467
 468         /* Register floating interrupt controller interface. */
 469         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 470         if (rc) {
 471                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 472                 goto out;
 473         }
 474
 475         rc = kvm_s390_gib_init(GAL_ISC);
 476         if (rc)
 477                 goto out;
 478
 479         return 0;
 480
 481 out:
 482         kvm_arch_exit();
 483         return rc;
 484 }
 485
 486 void kvm_arch_exit(void)
 487 {
 488         kvm_s390_gib_destroy();
 489         debug_unregister(kvm_s390_dbf);
 490 }
 491
 492 /* Section: device related */
 493 long kvm_arch_dev_ioctl(struct file *filp,
 494                         unsigned int ioctl, unsigned long arg)
 495 {
 496         if (ioctl == KVM_S390_ENABLE_SIE)
 497                 return s390_enable_sie();
 498         return -EINVAL;
 499 }
 500
 501 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 502 {
 503         int r;
 504
 505         switch (ext) {
 506         case KVM_CAP_S390_PSW:
 507         case KVM_CAP_S390_GMAP:
 508         case KVM_CAP_SYNC_MMU:
 509 #ifdef CONFIG_KVM_S390_UCONTROL
 510         case KVM_CAP_S390_UCONTROL:
 511 #endif
 512         case KVM_CAP_ASYNC_PF:
 513         case KVM_CAP_SYNC_REGS:
 514         case KVM_CAP_ONE_REG:
 515         case KVM_CAP_ENABLE_CAP:
 516         case KVM_CAP_S390_CSS_SUPPORT:
 517         case KVM_CAP_IOEVENTFD:
 518         case KVM_CAP_DEVICE_CTRL:
 519         case KVM_CAP_S390_IRQCHIP:
 520         case KVM_CAP_VM_ATTRIBUTES:
 521         case KVM_CAP_MP_STATE:
 522         case KVM_CAP_IMMEDIATE_EXIT:
 523         case KVM_CAP_S390_INJECT_IRQ:
 524         case KVM_CAP_S390_USER_SIGP:
 525         case KVM_CAP_S390_USER_STSI:
 526         case KVM_CAP_S390_SKEYS:
 527         case KVM_CAP_S390_IRQ_STATE:
 528         case KVM_CAP_S390_USER_INSTR0:
 529         case KVM_CAP_S390_CMMA_MIGRATION:
 530         case KVM_CAP_S390_AIS:
 531         case KVM_CAP_S390_AIS_MIGRATION:
 532                 r = 1;
 533                 break;
 534         case KVM_CAP_S390_HPAGE_1M:
 535                 r = 0;
 536                 if (hpage && !kvm_is_ucontrol(kvm))
 537                         r = 1;
 538                 break;
 539         case KVM_CAP_S390_MEM_OP:
 540                 r = MEM_OP_MAX_SIZE;
 541                 break;
 542         case KVM_CAP_NR_VCPUS:
 543         case KVM_CAP_MAX_VCPUS:
 544         case KVM_CAP_MAX_VCPU_ID:
 545                 r = KVM_S390_BSCA_CPU_SLOTS;
 546                 if (!kvm_s390_use_sca_entries())
 547                         r = KVM_MAX_VCPUS;
 548                 else if (sclp.has_esca && sclp.has_64bscao)
 549                         r = KVM_S390_ESCA_CPU_SLOTS;
 550                 break;
 551         case KVM_CAP_S390_COW:
 552                 r = MACHINE_HAS_ESOP;
 553                 break;
 554         case KVM_CAP_S390_VECTOR_REGISTERS:
 555                 r = MACHINE_HAS_VX;
 556                 break;
 557         case KVM_CAP_S390_RI:
 558                 r = test_facility(64);
 559                 break;
 560         case KVM_CAP_S390_GS:
 561                 r = test_facility(133);
 562                 break;
 563         case KVM_CAP_S390_BPB:
 564                 r = test_facility(82);
 565                 break;
 566         default:
 567                 r = 0;
 568         }
 569         return r;
 570 }
 571
 572 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 573                                     struct kvm_memory_slot *memslot)
 574 {
 575         int i;
 576         gfn_t cur_gfn, last_gfn;
 577         unsigned long gaddr, vmaddr;
 578         struct gmap *gmap = kvm->arch.gmap;
 579         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 580
 581         /* Loop over all guest segments */
 582         cur_gfn = memslot->base_gfn;
 583         last_gfn = memslot->base_gfn + memslot->npages;
 584         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 585                 gaddr = gfn_to_gpa(cur_gfn);
 586                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 587                 if (kvm_is_error_hva(vmaddr))
 588                         continue;
 589
 590                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 591                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 592                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 593                         if (test_bit(i, bitmap))
 594                                 mark_page_dirty(kvm, cur_gfn + i);
 595                 }
 596
 597                 if (fatal_signal_pending(current))
 598                         return;
 599                 cond_resched();
 600         }
 601 }
 602
 603 /* Section: vm related */
 604 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 605
 606 /*
 607  * Get (and clear) the dirty memory log for a memory slot.
 608  */
 609 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 610                                struct kvm_dirty_log *log)
 611 {
 612         int r;
 613         unsigned long n;
 614         struct kvm_memslots *slots;
 615         struct kvm_memory_slot *memslot;
 616         int is_dirty = 0;
 617
 618         if (kvm_is_ucontrol(kvm))
 619                 return -EINVAL;
 620
 621         mutex_lock(&kvm->slots_lock);
 622
 623         r = -EINVAL;
 624         if (log->slot >= KVM_USER_MEM_SLOTS)
 625                 goto out;
 626
 627         slots = kvm_memslots(kvm);
 628         memslot = id_to_memslot(slots, log->slot);
 629         r = -ENOENT;
 630         if (!memslot->dirty_bitmap)
 631                 goto out;
 632
 633         kvm_s390_sync_dirty_log(kvm, memslot);
 634         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 635         if (r)
 636                 goto out;
 637
 638         /* Clear the dirty log */
 639         if (is_dirty) {
 640                 n = kvm_dirty_bitmap_bytes(memslot);
 641                 memset(memslot->dirty_bitmap, 0, n);
 642         }
 643         r = 0;
 644 out:
 645         mutex_unlock(&kvm->slots_lock);
 646         return r;
 647 }
 648
 649 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 650 {
 651         unsigned int i;
 652         struct kvm_vcpu *vcpu;
 653
 654         kvm_for_each_vcpu(i, vcpu, kvm) {
 655                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 656         }
 657 }
 658
 659 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 660 {
 661         int r;
 662
 663         if (cap->flags)
 664                 return -EINVAL;
 665
 666         switch (cap->cap) {
 667         case KVM_CAP_S390_IRQCHIP:
 668                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 669                 kvm->arch.use_irqchip = 1;
 670                 r = 0;
 671                 break;
 672         case KVM_CAP_S390_USER_SIGP:
 673                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 674                 kvm->arch.user_sigp = 1;
 675                 r = 0;
 676                 break;
 677         case KVM_CAP_S390_VECTOR_REGISTERS:
 678                 mutex_lock(&kvm->lock);
 679                 if (kvm->created_vcpus) {
 680                         r = -EBUSY;
 681                 } else if (MACHINE_HAS_VX) {
 682                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 683                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 684                         if (test_facility(134)) {
 685                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 686                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 687                         }
 688                         if (test_facility(135)) {
 689                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 690                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 691                         }
 692                         if (test_facility(148)) {
 693                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 694                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 695                         }
 696                         if (test_facility(152)) {
 697                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 698                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 699                         }
 700                         r = 0;
 701                 } else
 702                         r = -EINVAL;
 703                 mutex_unlock(&kvm->lock);
 704                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 705                          r ? "(not available)" : "(success)");
 706                 break;
 707         case KVM_CAP_S390_RI:
 708                 r = -EINVAL;
 709                 mutex_lock(&kvm->lock);
 710                 if (kvm->created_vcpus) {
 711                         r = -EBUSY;
 712                 } else if (test_facility(64)) {
 713                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 714                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 715                         r = 0;
 716                 }
 717                 mutex_unlock(&kvm->lock);
 718                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 719                          r ? "(not available)" : "(success)");
 720                 break;
 721         case KVM_CAP_S390_AIS:
 722                 mutex_lock(&kvm->lock);
 723                 if (kvm->created_vcpus) {
 724                         r = -EBUSY;
 725                 } else {
 726                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 727                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 728                         r = 0;
 729                 }
 730                 mutex_unlock(&kvm->lock);
 731                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 732                          r ? "(not available)" : "(success)");
 733                 break;
 734         case KVM_CAP_S390_GS:
 735                 r = -EINVAL;
 736                 mutex_lock(&kvm->lock);
 737                 if (kvm->created_vcpus) {
 738                         r = -EBUSY;
 739                 } else if (test_facility(133)) {
 740                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 741                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 742                         r = 0;
 743                 }
 744                 mutex_unlock(&kvm->lock);
 745                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 746                          r ? "(not available)" : "(success)");
 747                 break;
 748         case KVM_CAP_S390_HPAGE_1M:
 749                 mutex_lock(&kvm->lock);
 750                 if (kvm->created_vcpus)
 751                         r = -EBUSY;
 752                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 753                         r = -EINVAL;
 754                 else {
 755                         r = 0;
 756                         down_write(&kvm->mm->mmap_sem);
 757                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 758                         up_write(&kvm->mm->mmap_sem);
 759                         /*
 760                          * We might have to create fake 4k page
 761                          * tables. To avoid that the hardware works on
 762                          * stale PGSTEs, we emulate these instructions.
 763                          */
 764                         kvm->arch.use_skf = 0;
 765                         kvm->arch.use_pfmfi = 0;
 766                 }
 767                 mutex_unlock(&kvm->lock);
 768                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 769                          r ? "(not available)" : "(success)");
 770                 break;
 771         case KVM_CAP_S390_USER_STSI:
 772                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 773                 kvm->arch.user_stsi = 1;
 774                 r = 0;
 775                 break;
 776         case KVM_CAP_S390_USER_INSTR0:
 777                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 778                 kvm->arch.user_instr0 = 1;
 779                 icpt_operexc_on_all_vcpus(kvm);
 780                 r = 0;
 781                 break;
 782         default:
 783                 r = -EINVAL;
 784                 break;
 785         }
 786         return r;
 787 }
 788
 789 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 790 {
 791         int ret;
 792
 793         switch (attr->attr) {
 794         case KVM_S390_VM_MEM_LIMIT_SIZE:
 795                 ret = 0;
 796                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 797                          kvm->arch.mem_limit);
 798                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 799                         ret = -EFAULT;
 800                 break;
 801         default:
 802                 ret = -ENXIO;
 803                 break;
 804         }
 805         return ret;
 806 }
 807
 808 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 809 {
 810         int ret;
 811         unsigned int idx;
 812         switch (attr->attr) {
 813         case KVM_S390_VM_MEM_ENABLE_CMMA:
 814                 ret = -ENXIO;
 815                 if (!sclp.has_cmma)
 816                         break;
 817
 818                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 819                 mutex_lock(&kvm->lock);
 820                 if (kvm->created_vcpus)
 821                         ret = -EBUSY;
 822                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 823                         ret = -EINVAL;
 824                 else {
 825                         kvm->arch.use_cmma = 1;
 826                         /* Not compatible with cmma. */
 827                         kvm->arch.use_pfmfi = 0;
 828                         ret = 0;
 829                 }
 830                 mutex_unlock(&kvm->lock);
 831                 break;
 832         case KVM_S390_VM_MEM_CLR_CMMA:
 833                 ret = -ENXIO;
 834                 if (!sclp.has_cmma)
 835                         break;
 836                 ret = -EINVAL;
 837                 if (!kvm->arch.use_cmma)
 838                         break;
 839
 840                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 841                 mutex_lock(&kvm->lock);
 842                 idx = srcu_read_lock(&kvm->srcu);
 843                 s390_reset_cmma(kvm->arch.gmap->mm);
 844                 srcu_read_unlock(&kvm->srcu, idx);
 845                 mutex_unlock(&kvm->lock);
 846                 ret = 0;
 847                 break;
 848         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 849                 unsigned long new_limit;
 850
 851                 if (kvm_is_ucontrol(kvm))
 852                         return -EINVAL;
 853
 854                 if (get_user(new_limit, (u64 __user *)attr->addr))
 855                         return -EFAULT;
 856
 857                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 858                     new_limit > kvm->arch.mem_limit)
 859                         return -E2BIG;
 860
 861                 if (!new_limit)
 862                         return -EINVAL;
 863
 864                 /* gmap_create takes last usable address */
 865                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 866                         new_limit -= 1;
 867
 868                 ret = -EBUSY;
 869                 mutex_lock(&kvm->lock);
 870                 if (!kvm->created_vcpus) {
 871                         /* gmap_create will round the limit up */
 872                         struct gmap *new = gmap_create(current->mm, new_limit);
 873
 874                         if (!new) {
 875                                 ret = -ENOMEM;
 876                         } else {
 877                                 gmap_remove(kvm->arch.gmap);
 878                                 new->private = kvm;
 879                                 kvm->arch.gmap = new;
 880                                 ret = 0;
 881                         }
 882                 }
 883                 mutex_unlock(&kvm->lock);
 884                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 885                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 886                          (void *) kvm->arch.gmap->asce);
 887                 break;
 888         }
 889         default:
 890                 ret = -ENXIO;
 891                 break;
 892         }
 893         return ret;
 894 }
 895
 896 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 897
 898 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 899 {
 900         struct kvm_vcpu *vcpu;
 901         int i;
 902
 903         kvm_s390_vcpu_block_all(kvm);
 904
 905         kvm_for_each_vcpu(i, vcpu, kvm) {
 906                 kvm_s390_vcpu_crypto_setup(vcpu);
 907                 /* recreate the shadow crycb by leaving the VSIE handler */
 908                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 909         }
 910
 911         kvm_s390_vcpu_unblock_all(kvm);
 912 }
 913
 914 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 915 {
 916         mutex_lock(&kvm->lock);
 917         switch (attr->attr) {
 918         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 919                 if (!test_kvm_facility(kvm, 76)) {
 920                         mutex_unlock(&kvm->lock);
 921                         return -EINVAL;
 922                 }
 923                 get_random_bytes(
 924                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 925                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 926                 kvm->arch.crypto.aes_kw = 1;
 927                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 928                 break;
 929         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 930                 if (!test_kvm_facility(kvm, 76)) {
 931                         mutex_unlock(&kvm->lock);
 932                         return -EINVAL;
 933                 }
 934                 get_random_bytes(
 935                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 936                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 937                 kvm->arch.crypto.dea_kw = 1;
 938                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 939                 break;
 940         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 941                 if (!test_kvm_facility(kvm, 76)) {
 942                         mutex_unlock(&kvm->lock);
 943                         return -EINVAL;
 944                 }
 945                 kvm->arch.crypto.aes_kw = 0;
 946                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 947                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 948                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 949                 break;
 950         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 951                 if (!test_kvm_facility(kvm, 76)) {
 952                         mutex_unlock(&kvm->lock);
 953                         return -EINVAL;
 954                 }
 955                 kvm->arch.crypto.dea_kw = 0;
 956                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 957                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 958                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 959                 break;
 960         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 961                 if (!ap_instructions_available()) {
 962                         mutex_unlock(&kvm->lock);
 963                         return -EOPNOTSUPP;
 964                 }
 965                 kvm->arch.crypto.apie = 1;
 966                 break;
 967         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 968                 if (!ap_instructions_available()) {
 969                         mutex_unlock(&kvm->lock);
 970                         return -EOPNOTSUPP;
 971                 }
 972                 kvm->arch.crypto.apie = 0;
 973                 break;
 974         default:
 975                 mutex_unlock(&kvm->lock);
 976                 return -ENXIO;
 977         }
 978
 979         kvm_s390_vcpu_crypto_reset_all(kvm);
 980         mutex_unlock(&kvm->lock);
 981         return 0;
 982 }
 983
 984 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 985 {
 986         int cx;
 987         struct kvm_vcpu *vcpu;
 988
 989         kvm_for_each_vcpu(cx, vcpu, kvm)
 990                 kvm_s390_sync_request(req, vcpu);
 991 }
 992
 993 /*
 994  * Must be called with kvm->srcu held to avoid races on memslots, and with
 995  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 996  */
 997 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 998 {
 999         struct kvm_memory_slot *ms;
1000         struct kvm_memslots *slots;
1001         unsigned long ram_pages = 0;
1002         int slotnr;
1003
1004         /* migration mode already enabled */
1005         if (kvm->arch.migration_mode)
1006                 return 0;
1007         slots = kvm_memslots(kvm);
1008         if (!slots || !slots->used_slots)
1009                 return -EINVAL;
1010
1011         if (!kvm->arch.use_cmma) {
1012                 kvm->arch.migration_mode = 1;
1013                 return 0;
1014         }
1015         /* mark all the pages in active slots as dirty */
1016         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1017                 ms = slots->memslots + slotnr;
1018                 if (!ms->dirty_bitmap)
1019                         return -EINVAL;
1020                 /*
1021                  * The second half of the bitmap is only used on x86,
1022                  * and would be wasted otherwise, so we put it to good
1023                  * use here to keep track of the state of the storage
1024                  * attributes.
1025                  */
1026                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1027                 ram_pages += ms->npages;
1028         }
1029         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1030         kvm->arch.migration_mode = 1;
1031         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1032         return 0;
1033 }
1034
1035 /*
1036  * Must be called with kvm->slots_lock to avoid races with ourselves and
1037  * kvm_s390_vm_start_migration.
1038  */
1039 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1040 {
1041         /* migration mode already disabled */
1042         if (!kvm->arch.migration_mode)
1043                 return 0;
1044         kvm->arch.migration_mode = 0;
1045         if (kvm->arch.use_cmma)
1046                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1047         return 0;
1048 }
1049
1050 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1051                                      struct kvm_device_attr *attr)
1052 {
1053         int res = -ENXIO;
1054
1055         mutex_lock(&kvm->slots_lock);
1056         switch (attr->attr) {
1057         case KVM_S390_VM_MIGRATION_START:
1058                 res = kvm_s390_vm_start_migration(kvm);
1059                 break;
1060         case KVM_S390_VM_MIGRATION_STOP:
1061                 res = kvm_s390_vm_stop_migration(kvm);
1062                 break;
1063         default:
1064                 break;
1065         }
1066         mutex_unlock(&kvm->slots_lock);
1067
1068         return res;
1069 }
1070
1071 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1072                                      struct kvm_device_attr *attr)
1073 {
1074         u64 mig = kvm->arch.migration_mode;
1075
1076         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1077                 return -ENXIO;
1078
1079         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1080                 return -EFAULT;
1081         return 0;
1082 }
1083
1084 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1085 {
1086         struct kvm_s390_vm_tod_clock gtod;
1087
1088         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1089                 return -EFAULT;
1090
1091         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1092                 return -EINVAL;
1093         kvm_s390_set_tod_clock(kvm, &gtod);
1094
1095         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1096                 gtod.epoch_idx, gtod.tod);
1097
1098         return 0;
1099 }
1100
1101 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1102 {
1103         u8 gtod_high;
1104
1105         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1106                                            sizeof(gtod_high)))
1107                 return -EFAULT;
1108
1109         if (gtod_high != 0)
1110                 return -EINVAL;
1111         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1112
1113         return 0;
1114 }
1115
1116 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1117 {
1118         struct kvm_s390_vm_tod_clock gtod = { 0 };
1119
1120         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1121                            sizeof(gtod.tod)))
1122                 return -EFAULT;
1123
1124         kvm_s390_set_tod_clock(kvm, &gtod);
1125         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1126         return 0;
1127 }
1128
1129 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1130 {
1131         int ret;
1132
1133         if (attr->flags)
1134                 return -EINVAL;
1135
1136         switch (attr->attr) {
1137         case KVM_S390_VM_TOD_EXT:
1138                 ret = kvm_s390_set_tod_ext(kvm, attr);
1139                 break;
1140         case KVM_S390_VM_TOD_HIGH:
1141                 ret = kvm_s390_set_tod_high(kvm, attr);
1142                 break;
1143         case KVM_S390_VM_TOD_LOW:
1144                 ret = kvm_s390_set_tod_low(kvm, attr);
1145                 break;
1146         default:
1147                 ret = -ENXIO;
1148                 break;
1149         }
1150         return ret;
1151 }
1152
1153 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1154                                    struct kvm_s390_vm_tod_clock *gtod)
1155 {
1156         struct kvm_s390_tod_clock_ext htod;
1157
1158         preempt_disable();
1159
1160         get_tod_clock_ext((char *)&htod);
1161
1162         gtod->tod = htod.tod + kvm->arch.epoch;
1163         gtod->epoch_idx = 0;
1164         if (test_kvm_facility(kvm, 139)) {
1165                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1166                 if (gtod->tod < htod.tod)
1167                         gtod->epoch_idx += 1;
1168         }
1169
1170         preempt_enable();
1171 }
1172
1173 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175         struct kvm_s390_vm_tod_clock gtod;
1176
1177         memset(&gtod, 0, sizeof(gtod));
1178         kvm_s390_get_tod_clock(kvm, &gtod);
1179         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1180                 return -EFAULT;
1181
1182         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1183                 gtod.epoch_idx, gtod.tod);
1184         return 0;
1185 }
1186
1187 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1188 {
1189         u8 gtod_high = 0;
1190
1191         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1192                                          sizeof(gtod_high)))
1193                 return -EFAULT;
1194         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1195
1196         return 0;
1197 }
1198
1199 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201         u64 gtod;
1202
1203         gtod = kvm_s390_get_tod_clock_fast(kvm);
1204         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1205                 return -EFAULT;
1206         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1207
1208         return 0;
1209 }
1210
1211 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1212 {
1213         int ret;
1214
1215         if (attr->flags)
1216                 return -EINVAL;
1217
1218         switch (attr->attr) {
1219         case KVM_S390_VM_TOD_EXT:
1220                 ret = kvm_s390_get_tod_ext(kvm, attr);
1221                 break;
1222         case KVM_S390_VM_TOD_HIGH:
1223                 ret = kvm_s390_get_tod_high(kvm, attr);
1224                 break;
1225         case KVM_S390_VM_TOD_LOW:
1226                 ret = kvm_s390_get_tod_low(kvm, attr);
1227                 break;
1228         default:
1229                 ret = -ENXIO;
1230                 break;
1231         }
1232         return ret;
1233 }
1234
1235 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1236 {
1237         struct kvm_s390_vm_cpu_processor *proc;
1238         u16 lowest_ibc, unblocked_ibc;
1239         int ret = 0;
1240
1241         mutex_lock(&kvm->lock);
1242         if (kvm->created_vcpus) {
1243                 ret = -EBUSY;
1244                 goto out;
1245         }
1246         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1247         if (!proc) {
1248                 ret = -ENOMEM;
1249                 goto out;
1250         }
1251         if (!copy_from_user(proc, (void __user *)attr->addr,
1252                             sizeof(*proc))) {
1253                 kvm->arch.model.cpuid = proc->cpuid;
1254                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1255                 unblocked_ibc = sclp.ibc & 0xfff;
1256                 if (lowest_ibc && proc->ibc) {
1257                         if (proc->ibc > unblocked_ibc)
1258                                 kvm->arch.model.ibc = unblocked_ibc;
1259                         else if (proc->ibc < lowest_ibc)
1260                                 kvm->arch.model.ibc = lowest_ibc;
1261                         else
1262                                 kvm->arch.model.ibc = proc->ibc;
1263                 }
1264                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1265                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1266                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1267                          kvm->arch.model.ibc,
1268                          kvm->arch.model.cpuid);
1269                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1270                          kvm->arch.model.fac_list[0],
1271                          kvm->arch.model.fac_list[1],
1272                          kvm->arch.model.fac_list[2]);
1273         } else
1274                 ret = -EFAULT;
1275         kfree(proc);
1276 out:
1277         mutex_unlock(&kvm->lock);
1278         return ret;
1279 }
1280
1281 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1282                                        struct kvm_device_attr *attr)
1283 {
1284         struct kvm_s390_vm_cpu_feat data;
1285
1286         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1287                 return -EFAULT;
1288         if (!bitmap_subset((unsigned long *) data.feat,
1289                            kvm_s390_available_cpu_feat,
1290                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1291                 return -EINVAL;
1292
1293         mutex_lock(&kvm->lock);
1294         if (kvm->created_vcpus) {
1295                 mutex_unlock(&kvm->lock);
1296                 return -EBUSY;
1297         }
1298         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1299                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1300         mutex_unlock(&kvm->lock);
1301         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1302                          data.feat[0],
1303                          data.feat[1],
1304                          data.feat[2]);
1305         return 0;
1306 }
1307
1308 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1309                                           struct kvm_device_attr *attr)
1310 {
1311         mutex_lock(&kvm->lock);
1312         if (kvm->created_vcpus) {
1313                 mutex_unlock(&kvm->lock);
1314                 return -EBUSY;
1315         }
1316
1317         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1318                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1319                 mutex_unlock(&kvm->lock);
1320                 return -EFAULT;
1321         }
1322         mutex_unlock(&kvm->lock);
1323
1324         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1325                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1326                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1327                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1328                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1329         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1330                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1331                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1332         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1333                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1334                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1335         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1336                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1338         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1339                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1340                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1341         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1342                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1343                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1344         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1345                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1346                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1347         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1348                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1349                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1350         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1351                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1353         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1354                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1355                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1356         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1357                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1358                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1359         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1360                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1362         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1365         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1368         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1371         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1376         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1381
1382         return 0;
1383 }
1384
1385 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1386 {
1387         int ret = -ENXIO;
1388
1389         switch (attr->attr) {
1390         case KVM_S390_VM_CPU_PROCESSOR:
1391                 ret = kvm_s390_set_processor(kvm, attr);
1392                 break;
1393         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1394                 ret = kvm_s390_set_processor_feat(kvm, attr);
1395                 break;
1396         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1397                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1398                 break;
1399         }
1400         return ret;
1401 }
1402
1403 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1404 {
1405         struct kvm_s390_vm_cpu_processor *proc;
1406         int ret = 0;
1407
1408         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1409         if (!proc) {
1410                 ret = -ENOMEM;
1411                 goto out;
1412         }
1413         proc->cpuid = kvm->arch.model.cpuid;
1414         proc->ibc = kvm->arch.model.ibc;
1415         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1416                S390_ARCH_FAC_LIST_SIZE_BYTE);
1417         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1418                  kvm->arch.model.ibc,
1419                  kvm->arch.model.cpuid);
1420         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1421                  kvm->arch.model.fac_list[0],
1422                  kvm->arch.model.fac_list[1],
1423                  kvm->arch.model.fac_list[2]);
1424         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1425                 ret = -EFAULT;
1426         kfree(proc);
1427 out:
1428         return ret;
1429 }
1430
1431 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1432 {
1433         struct kvm_s390_vm_cpu_machine *mach;
1434         int ret = 0;
1435
1436         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1437         if (!mach) {
1438                 ret = -ENOMEM;
1439                 goto out;
1440         }
1441         get_cpu_id((struct cpuid *) &mach->cpuid);
1442         mach->ibc = sclp.ibc;
1443         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1444                S390_ARCH_FAC_LIST_SIZE_BYTE);
1445         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1446                sizeof(S390_lowcore.stfle_fac_list));
1447         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1448                  kvm->arch.model.ibc,
1449                  kvm->arch.model.cpuid);
1450         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1451                  mach->fac_mask[0],
1452                  mach->fac_mask[1],
1453                  mach->fac_mask[2]);
1454         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1455                  mach->fac_list[0],
1456                  mach->fac_list[1],
1457                  mach->fac_list[2]);
1458         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1459                 ret = -EFAULT;
1460         kfree(mach);
1461 out:
1462         return ret;
1463 }
1464
1465 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1466                                        struct kvm_device_attr *attr)
1467 {
1468         struct kvm_s390_vm_cpu_feat data;
1469
1470         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1471                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1472         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1473                 return -EFAULT;
1474         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1475                          data.feat[0],
1476                          data.feat[1],
1477                          data.feat[2]);
1478         return 0;
1479 }
1480
1481 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1482                                      struct kvm_device_attr *attr)
1483 {
1484         struct kvm_s390_vm_cpu_feat data;
1485
1486         bitmap_copy((unsigned long *) data.feat,
1487                     kvm_s390_available_cpu_feat,
1488                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1489         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1490                 return -EFAULT;
1491         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1492                          data.feat[0],
1493                          data.feat[1],
1494                          data.feat[2]);
1495         return 0;
1496 }
1497
1498 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1499                                           struct kvm_device_attr *attr)
1500 {
1501         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1502             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1503                 return -EFAULT;
1504
1505         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1506                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1507                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1508                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1509                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1510         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1511                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1512                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1513         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1514                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1515                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1516         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1517                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1519         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1520                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1521                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1522         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1523                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1525         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1526                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1527                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1528         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1529                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1531         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1532                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1534         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1535                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1536                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1537         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1538                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1540         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1541                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1543         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1546         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1549         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1552         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1557         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1562
1563         return 0;
1564 }
1565
1566 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1567                                         struct kvm_device_attr *attr)
1568 {
1569         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1570             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1571                 return -EFAULT;
1572
1573         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1574                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1575                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1576                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1577                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1578         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1579                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1580                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1581         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1582                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1583                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1584         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1585                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1586                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1587         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1588                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1589                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1590         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1591                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1592                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1593         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1594                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1595                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1596         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1597                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1598                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1599         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1600                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1601                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1602         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1603                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1604                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1605         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1606                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1607                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1608         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1609                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1610                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1611         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1612                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1614         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1615                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1616                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1617         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1618                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1620         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1621                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1622                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1625         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1626                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1627                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1628                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1630
1631         return 0;
1632 }
1633
1634 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1635 {
1636         int ret = -ENXIO;
1637
1638         switch (attr->attr) {
1639         case KVM_S390_VM_CPU_PROCESSOR:
1640                 ret = kvm_s390_get_processor(kvm, attr);
1641                 break;
1642         case KVM_S390_VM_CPU_MACHINE:
1643                 ret = kvm_s390_get_machine(kvm, attr);
1644                 break;
1645         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1646                 ret = kvm_s390_get_processor_feat(kvm, attr);
1647                 break;
1648         case KVM_S390_VM_CPU_MACHINE_FEAT:
1649                 ret = kvm_s390_get_machine_feat(kvm, attr);
1650                 break;
1651         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1652                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1653                 break;
1654         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1655                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1656                 break;
1657         }
1658         return ret;
1659 }
1660
1661 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1662 {
1663         int ret;
1664
1665         switch (attr->group) {
1666         case KVM_S390_VM_MEM_CTRL:
1667                 ret = kvm_s390_set_mem_control(kvm, attr);
1668                 break;
1669         case KVM_S390_VM_TOD:
1670                 ret = kvm_s390_set_tod(kvm, attr);
1671                 break;
1672         case KVM_S390_VM_CPU_MODEL:
1673                 ret = kvm_s390_set_cpu_model(kvm, attr);
1674                 break;
1675         case KVM_S390_VM_CRYPTO:
1676                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1677                 break;
1678         case KVM_S390_VM_MIGRATION:
1679                 ret = kvm_s390_vm_set_migration(kvm, attr);
1680                 break;
1681         default:
1682                 ret = -ENXIO;
1683                 break;
1684         }
1685
1686         return ret;
1687 }
1688
1689 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1690 {
1691         int ret;
1692
1693         switch (attr->group) {
1694         case KVM_S390_VM_MEM_CTRL:
1695                 ret = kvm_s390_get_mem_control(kvm, attr);
1696                 break;
1697         case KVM_S390_VM_TOD:
1698                 ret = kvm_s390_get_tod(kvm, attr);
1699                 break;
1700         case KVM_S390_VM_CPU_MODEL:
1701                 ret = kvm_s390_get_cpu_model(kvm, attr);
1702                 break;
1703         case KVM_S390_VM_MIGRATION:
1704                 ret = kvm_s390_vm_get_migration(kvm, attr);
1705                 break;
1706         default:
1707                 ret = -ENXIO;
1708                 break;
1709         }
1710
1711         return ret;
1712 }
1713
1714 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1715 {
1716         int ret;
1717
1718         switch (attr->group) {
1719         case KVM_S390_VM_MEM_CTRL:
1720                 switch (attr->attr) {
1721                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1722                 case KVM_S390_VM_MEM_CLR_CMMA:
1723                         ret = sclp.has_cmma ? 0 : -ENXIO;
1724                         break;
1725                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1726                         ret = 0;
1727                         break;
1728                 default:
1729                         ret = -ENXIO;
1730                         break;
1731                 }
1732                 break;
1733         case KVM_S390_VM_TOD:
1734                 switch (attr->attr) {
1735                 case KVM_S390_VM_TOD_LOW:
1736                 case KVM_S390_VM_TOD_HIGH:
1737                         ret = 0;
1738                         break;
1739                 default:
1740                         ret = -ENXIO;
1741                         break;
1742                 }
1743                 break;
1744         case KVM_S390_VM_CPU_MODEL:
1745                 switch (attr->attr) {
1746                 case KVM_S390_VM_CPU_PROCESSOR:
1747                 case KVM_S390_VM_CPU_MACHINE:
1748                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1749                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1750                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1751                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1752                         ret = 0;
1753                         break;
1754                 default:
1755                         ret = -ENXIO;
1756                         break;
1757                 }
1758                 break;
1759         case KVM_S390_VM_CRYPTO:
1760                 switch (attr->attr) {
1761                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1762                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1763                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1764                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1765                         ret = 0;
1766                         break;
1767                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1768                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1769                         ret = ap_instructions_available() ? 0 : -ENXIO;
1770                         break;
1771                 default:
1772                         ret = -ENXIO;
1773                         break;
1774                 }
1775                 break;
1776         case KVM_S390_VM_MIGRATION:
1777                 ret = 0;
1778                 break;
1779         default:
1780                 ret = -ENXIO;
1781                 break;
1782         }
1783
1784         return ret;
1785 }
1786
1787 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1788 {
1789         uint8_t *keys;
1790         uint64_t hva;
1791         int srcu_idx, i, r = 0;
1792
1793         if (args->flags != 0)
1794                 return -EINVAL;
1795
1796         /* Is this guest using storage keys? */
1797         if (!mm_uses_skeys(current->mm))
1798                 return KVM_S390_GET_SKEYS_NONE;
1799
1800         /* Enforce sane limit on memory allocation */
1801         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1802                 return -EINVAL;
1803
1804         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1805         if (!keys)
1806                 return -ENOMEM;
1807
1808         down_read(&current->mm->mmap_sem);
1809         srcu_idx = srcu_read_lock(&kvm->srcu);
1810         for (i = 0; i < args->count; i++) {
1811                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1812                 if (kvm_is_error_hva(hva)) {
1813                         r = -EFAULT;
1814                         break;
1815                 }
1816
1817                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1818                 if (r)
1819                         break;
1820         }
1821         srcu_read_unlock(&kvm->srcu, srcu_idx);
1822         up_read(&current->mm->mmap_sem);
1823
1824         if (!r) {
1825                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1826                                  sizeof(uint8_t) * args->count);
1827                 if (r)
1828                         r = -EFAULT;
1829         }
1830
1831         kvfree(keys);
1832         return r;
1833 }
1834
1835 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1836 {
1837         uint8_t *keys;
1838         uint64_t hva;
1839         int srcu_idx, i, r = 0;
1840         bool unlocked;
1841
1842         if (args->flags != 0)
1843                 return -EINVAL;
1844
1845         /* Enforce sane limit on memory allocation */
1846         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1847                 return -EINVAL;
1848
1849         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1850         if (!keys)
1851                 return -ENOMEM;
1852
1853         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1854                            sizeof(uint8_t) * args->count);
1855         if (r) {
1856                 r = -EFAULT;
1857                 goto out;
1858         }
1859
1860         /* Enable storage key handling for the guest */
1861         r = s390_enable_skey();
1862         if (r)
1863                 goto out;
1864
1865         i = 0;
1866         down_read(&current->mm->mmap_sem);
1867         srcu_idx = srcu_read_lock(&kvm->srcu);
1868         while (i < args->count) {
1869                 unlocked = false;
1870                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1871                 if (kvm_is_error_hva(hva)) {
1872                         r = -EFAULT;
1873                         break;
1874                 }
1875
1876                 /* Lowest order bit is reserved */
1877                 if (keys[i] & 0x01) {
1878                         r = -EINVAL;
1879                         break;
1880                 }
1881
1882                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1883                 if (r) {
1884                         r = fixup_user_fault(current, current->mm, hva,
1885                                              FAULT_FLAG_WRITE, &unlocked);
1886                         if (r)
1887                                 break;
1888                 }
1889                 if (!r)
1890                         i++;
1891         }
1892         srcu_read_unlock(&kvm->srcu, srcu_idx);
1893         up_read(&current->mm->mmap_sem);
1894 out:
1895         kvfree(keys);
1896         return r;
1897 }
1898
1899 /*
1900  * Base address and length must be sent at the start of each block, therefore
1901  * it's cheaper to send some clean data, as long as it's less than the size of
1902  * two longs.
1903  */
1904 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1905 /* for consistency */
1906 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1907
1908 /*
1909  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1910  * address falls in a hole. In that case the index of one of the memslots
1911  * bordering the hole is returned.
1912  */
1913 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1914 {
1915         int start = 0, end = slots->used_slots;
1916         int slot = atomic_read(&slots->lru_slot);
1917         struct kvm_memory_slot *memslots = slots->memslots;
1918
1919         if (gfn >= memslots[slot].base_gfn &&
1920             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1921                 return slot;
1922
1923         while (start < end) {
1924                 slot = start + (end - start) / 2;
1925
1926                 if (gfn >= memslots[slot].base_gfn)
1927                         end = slot;
1928                 else
1929                         start = slot + 1;
1930         }
1931
1932         if (gfn >= memslots[start].base_gfn &&
1933             gfn < memslots[start].base_gfn + memslots[start].npages) {
1934                 atomic_set(&slots->lru_slot, start);
1935         }
1936
1937         return start;
1938 }
1939
1940 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1941                               u8 *res, unsigned long bufsize)
1942 {
1943         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1944
1945         args->count = 0;
1946         while (args->count < bufsize) {
1947                 hva = gfn_to_hva(kvm, cur_gfn);
1948                 /*
1949                  * We return an error if the first value was invalid, but we
1950                  * return successfully if at least one value was copied.
1951                  */
1952                 if (kvm_is_error_hva(hva))
1953                         return args->count ? 0 : -EFAULT;
1954                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1955                         pgstev = 0;
1956                 res[args->count++] = (pgstev >> 24) & 0x43;
1957                 cur_gfn++;
1958         }
1959
1960         return 0;
1961 }
1962
1963 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1964                                               unsigned long cur_gfn)
1965 {
1966         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1967         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1968         unsigned long ofs = cur_gfn - ms->base_gfn;
1969
1970         if (ms->base_gfn + ms->npages <= cur_gfn) {
1971                 slotidx--;
1972                 /* If we are above the highest slot, wrap around */
1973                 if (slotidx < 0)
1974                         slotidx = slots->used_slots - 1;
1975
1976                 ms = slots->memslots + slotidx;
1977                 ofs = 0;
1978         }
1979         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1980         while ((slotidx > 0) && (ofs >= ms->npages)) {
1981                 slotidx--;
1982                 ms = slots->memslots + slotidx;
1983                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1984         }
1985         return ms->base_gfn + ofs;
1986 }
1987
1988 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1989                              u8 *res, unsigned long bufsize)
1990 {
1991         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1992         struct kvm_memslots *slots = kvm_memslots(kvm);
1993         struct kvm_memory_slot *ms;
1994
1995         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1996         ms = gfn_to_memslot(kvm, cur_gfn);
1997         args->count = 0;
1998         args->start_gfn = cur_gfn;
1999         if (!ms)
2000                 return 0;
2001         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2002         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2003
2004         while (args->count < bufsize) {
2005                 hva = gfn_to_hva(kvm, cur_gfn);
2006                 if (kvm_is_error_hva(hva))
2007                         return 0;
2008                 /* Decrement only if we actually flipped the bit to 0 */
2009                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2010                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2011                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2012                         pgstev = 0;
2013                 /* Save the value */
2014                 res[args->count++] = (pgstev >> 24) & 0x43;
2015                 /* If the next bit is too far away, stop. */
2016                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2017                         return 0;
2018                 /* If we reached the previous "next", find the next one */
2019                 if (cur_gfn == next_gfn)
2020                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2021                 /* Reached the end of memory or of the buffer, stop */
2022                 if ((next_gfn >= mem_end) ||
2023                     (next_gfn - args->start_gfn >= bufsize))
2024                         return 0;
2025                 cur_gfn++;
2026                 /* Reached the end of the current memslot, take the next one. */
2027                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2028                         ms = gfn_to_memslot(kvm, cur_gfn);
2029                         if (!ms)
2030                                 return 0;
2031                 }
2032         }
2033         return 0;
2034 }
2035
2036 /*
2037  * This function searches for the next page with dirty CMMA attributes, and
2038  * saves the attributes in the buffer up to either the end of the buffer or
2039  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2040  * no trailing clean bytes are saved.
2041  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2042  * output buffer will indicate 0 as length.
2043  */
2044 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2045                                   struct kvm_s390_cmma_log *args)
2046 {
2047         unsigned long bufsize;
2048         int srcu_idx, peek, ret;
2049         u8 *values;
2050
2051         if (!kvm->arch.use_cmma)
2052                 return -ENXIO;
2053         /* Invalid/unsupported flags were specified */
2054         if (args->flags & ~KVM_S390_CMMA_PEEK)
2055                 return -EINVAL;
2056         /* Migration mode query, and we are not doing a migration */
2057         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2058         if (!peek && !kvm->arch.migration_mode)
2059                 return -EINVAL;
2060         /* CMMA is disabled or was not used, or the buffer has length zero */
2061         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2062         if (!bufsize || !kvm->mm->context.uses_cmm) {
2063                 memset(args, 0, sizeof(*args));
2064                 return 0;
2065         }
2066         /* We are not peeking, and there are no dirty pages */
2067         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2068                 memset(args, 0, sizeof(*args));
2069                 return 0;
2070         }
2071
2072         values = vmalloc(bufsize);
2073         if (!values)
2074                 return -ENOMEM;
2075
2076         down_read(&kvm->mm->mmap_sem);
2077         srcu_idx = srcu_read_lock(&kvm->srcu);
2078         if (peek)
2079                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2080         else
2081                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2082         srcu_read_unlock(&kvm->srcu, srcu_idx);
2083         up_read(&kvm->mm->mmap_sem);
2084
2085         if (kvm->arch.migration_mode)
2086                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2087         else
2088                 args->remaining = 0;
2089
2090         if (copy_to_user((void __user *)args->values, values, args->count))
2091                 ret = -EFAULT;
2092
2093         vfree(values);
2094         return ret;
2095 }
2096
2097 /*
2098  * This function sets the CMMA attributes for the given pages. If the input
2099  * buffer has zero length, no action is taken, otherwise the attributes are
2100  * set and the mm->context.uses_cmm flag is set.
2101  */
2102 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2103                                   const struct kvm_s390_cmma_log *args)
2104 {
2105         unsigned long hva, mask, pgstev, i;
2106         uint8_t *bits;
2107         int srcu_idx, r = 0;
2108
2109         mask = args->mask;
2110
2111         if (!kvm->arch.use_cmma)
2112                 return -ENXIO;
2113         /* invalid/unsupported flags */
2114         if (args->flags != 0)
2115                 return -EINVAL;
2116         /* Enforce sane limit on memory allocation */
2117         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2118                 return -EINVAL;
2119         /* Nothing to do */
2120         if (args->count == 0)
2121                 return 0;
2122
2123         bits = vmalloc(array_size(sizeof(*bits), args->count));
2124         if (!bits)
2125                 return -ENOMEM;
2126
2127         r = copy_from_user(bits, (void __user *)args->values, args->count);
2128         if (r) {
2129                 r = -EFAULT;
2130                 goto out;
2131         }
2132
2133         down_read(&kvm->mm->mmap_sem);
2134         srcu_idx = srcu_read_lock(&kvm->srcu);
2135         for (i = 0; i < args->count; i++) {
2136                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2137                 if (kvm_is_error_hva(hva)) {
2138                         r = -EFAULT;
2139                         break;
2140                 }
2141
2142                 pgstev = bits[i];
2143                 pgstev = pgstev << 24;
2144                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2145                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2146         }
2147         srcu_read_unlock(&kvm->srcu, srcu_idx);
2148         up_read(&kvm->mm->mmap_sem);
2149
2150         if (!kvm->mm->context.uses_cmm) {
2151                 down_write(&kvm->mm->mmap_sem);
2152                 kvm->mm->context.uses_cmm = 1;
2153                 up_write(&kvm->mm->mmap_sem);
2154         }
2155 out:
2156         vfree(bits);
2157         return r;
2158 }
2159
2160 long kvm_arch_vm_ioctl(struct file *filp,
2161                        unsigned int ioctl, unsigned long arg)
2162 {
2163         struct kvm *kvm = filp->private_data;
2164         void __user *argp = (void __user *)arg;
2165         struct kvm_device_attr attr;
2166         int r;
2167
2168         switch (ioctl) {
2169         case KVM_S390_INTERRUPT: {
2170                 struct kvm_s390_interrupt s390int;
2171
2172                 r = -EFAULT;
2173                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2174                         break;
2175                 r = kvm_s390_inject_vm(kvm, &s390int);
2176                 break;
2177         }
2178         case KVM_CREATE_IRQCHIP: {
2179                 struct kvm_irq_routing_entry routing;
2180
2181                 r = -EINVAL;
2182                 if (kvm->arch.use_irqchip) {
2183                         /* Set up dummy routing. */
2184                         memset(&routing, 0, sizeof(routing));
2185                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2186                 }
2187                 break;
2188         }
2189         case KVM_SET_DEVICE_ATTR: {
2190                 r = -EFAULT;
2191                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2192                         break;
2193                 r = kvm_s390_vm_set_attr(kvm, &attr);
2194                 break;
2195         }
2196         case KVM_GET_DEVICE_ATTR: {
2197                 r = -EFAULT;
2198                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2199                         break;
2200                 r = kvm_s390_vm_get_attr(kvm, &attr);
2201                 break;
2202         }
2203         case KVM_HAS_DEVICE_ATTR: {
2204                 r = -EFAULT;
2205                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2206                         break;
2207                 r = kvm_s390_vm_has_attr(kvm, &attr);
2208                 break;
2209         }
2210         case KVM_S390_GET_SKEYS: {
2211                 struct kvm_s390_skeys args;
2212
2213                 r = -EFAULT;
2214                 if (copy_from_user(&args, argp,
2215                                    sizeof(struct kvm_s390_skeys)))
2216                         break;
2217                 r = kvm_s390_get_skeys(kvm, &args);
2218                 break;
2219         }
2220         case KVM_S390_SET_SKEYS: {
2221                 struct kvm_s390_skeys args;
2222
2223                 r = -EFAULT;
2224                 if (copy_from_user(&args, argp,
2225                                    sizeof(struct kvm_s390_skeys)))
2226                         break;
2227                 r = kvm_s390_set_skeys(kvm, &args);
2228                 break;
2229         }
2230         case KVM_S390_GET_CMMA_BITS: {
2231                 struct kvm_s390_cmma_log args;
2232
2233                 r = -EFAULT;
2234                 if (copy_from_user(&args, argp, sizeof(args)))
2235                         break;
2236                 mutex_lock(&kvm->slots_lock);
2237                 r = kvm_s390_get_cmma_bits(kvm, &args);
2238                 mutex_unlock(&kvm->slots_lock);
2239                 if (!r) {
2240                         r = copy_to_user(argp, &args, sizeof(args));
2241                         if (r)
2242                                 r = -EFAULT;
2243                 }
2244                 break;
2245         }
2246         case KVM_S390_SET_CMMA_BITS: {
2247                 struct kvm_s390_cmma_log args;
2248
2249                 r = -EFAULT;
2250                 if (copy_from_user(&args, argp, sizeof(args)))
2251                         break;
2252                 mutex_lock(&kvm->slots_lock);
2253                 r = kvm_s390_set_cmma_bits(kvm, &args);
2254                 mutex_unlock(&kvm->slots_lock);
2255                 break;
2256         }
2257         default:
2258                 r = -ENOTTY;
2259         }
2260
2261         return r;
2262 }
2263
2264 static int kvm_s390_apxa_installed(void)
2265 {
2266         struct ap_config_info info;
2267
2268         if (ap_instructions_available()) {
2269                 if (ap_qci(&info) == 0)
2270                         return info.apxa;
2271         }
2272
2273         return 0;
2274 }
2275
2276 /*
2277  * The format of the crypto control block (CRYCB) is specified in the 3 low
2278  * order bits of the CRYCB designation (CRYCBD) field as follows:
2279  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2280  *           AP extended addressing (APXA) facility are installed.
2281  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2282  * Format 2: Both the APXA and MSAX3 facilities are installed
2283  */
2284 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2285 {
2286         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2287
2288         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2289         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2290
2291         /* Check whether MSAX3 is installed */
2292         if (!test_kvm_facility(kvm, 76))
2293                 return;
2294
2295         if (kvm_s390_apxa_installed())
2296                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2297         else
2298                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2299 }
2300
2301 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2302                                unsigned long *aqm, unsigned long *adm)
2303 {
2304         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2305
2306         mutex_lock(&kvm->lock);
2307         kvm_s390_vcpu_block_all(kvm);
2308
2309         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2310         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2311                 memcpy(crycb->apcb1.apm, apm, 32);
2312                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2313                          apm[0], apm[1], apm[2], apm[3]);
2314                 memcpy(crycb->apcb1.aqm, aqm, 32);
2315                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2316                          aqm[0], aqm[1], aqm[2], aqm[3]);
2317                 memcpy(crycb->apcb1.adm, adm, 32);
2318                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2319                          adm[0], adm[1], adm[2], adm[3]);
2320                 break;
2321         case CRYCB_FORMAT1:
2322         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2323                 memcpy(crycb->apcb0.apm, apm, 8);
2324                 memcpy(crycb->apcb0.aqm, aqm, 2);
2325                 memcpy(crycb->apcb0.adm, adm, 2);
2326                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2327                          apm[0], *((unsigned short *)aqm),
2328                          *((unsigned short *)adm));
2329                 break;
2330         default:        /* Can not happen */
2331                 break;
2332         }
2333
2334         /* recreate the shadow crycb for each vcpu */
2335         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2336         kvm_s390_vcpu_unblock_all(kvm);
2337         mutex_unlock(&kvm->lock);
2338 }
2339 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2340
2341 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2342 {
2343         mutex_lock(&kvm->lock);
2344         kvm_s390_vcpu_block_all(kvm);
2345
2346         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2347                sizeof(kvm->arch.crypto.crycb->apcb0));
2348         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2349                sizeof(kvm->arch.crypto.crycb->apcb1));
2350
2351         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2352         /* recreate the shadow crycb for each vcpu */
2353         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2354         kvm_s390_vcpu_unblock_all(kvm);
2355         mutex_unlock(&kvm->lock);
2356 }
2357 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2358
2359 static u64 kvm_s390_get_initial_cpuid(void)
2360 {
2361         struct cpuid cpuid;
2362
2363         get_cpu_id(&cpuid);
2364         cpuid.version = 0xff;
2365         return *((u64 *) &cpuid);
2366 }
2367
2368 static void kvm_s390_crypto_init(struct kvm *kvm)
2369 {
2370         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2371         kvm_s390_set_crycb_format(kvm);
2372
2373         if (!test_kvm_facility(kvm, 76))
2374                 return;
2375
2376         /* Enable AES/DEA protected key functions by default */
2377         kvm->arch.crypto.aes_kw = 1;
2378         kvm->arch.crypto.dea_kw = 1;
2379         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2380                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2381         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2382                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2383 }
2384
2385 static void sca_dispose(struct kvm *kvm)
2386 {
2387         if (kvm->arch.use_esca)
2388                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2389         else
2390                 free_page((unsigned long)(kvm->arch.sca));
2391         kvm->arch.sca = NULL;
2392 }
2393
2394 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2395 {
2396         gfp_t alloc_flags = GFP_KERNEL;
2397         int i, rc;
2398         char debug_name[16];
2399         static unsigned long sca_offset;
2400
2401         rc = -EINVAL;
2402 #ifdef CONFIG_KVM_S390_UCONTROL
2403         if (type & ~KVM_VM_S390_UCONTROL)
2404                 goto out_err;
2405         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2406                 goto out_err;
2407 #else
2408         if (type)
2409                 goto out_err;
2410 #endif
2411
2412         rc = s390_enable_sie();
2413         if (rc)
2414                 goto out_err;
2415
2416         rc = -ENOMEM;
2417
2418         if (!sclp.has_64bscao)
2419                 alloc_flags |= GFP_DMA;
2420         rwlock_init(&kvm->arch.sca_lock);
2421         /* start with basic SCA */
2422         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2423         if (!kvm->arch.sca)
2424                 goto out_err;
2425         mutex_lock(&kvm_lock);
2426         sca_offset += 16;
2427         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2428                 sca_offset = 0;
2429         kvm->arch.sca = (struct bsca_block *)
2430                         ((char *) kvm->arch.sca + sca_offset);
2431         mutex_unlock(&kvm_lock);
2432
2433         sprintf(debug_name, "kvm-%u", current->pid);
2434
2435         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2436         if (!kvm->arch.dbf)
2437                 goto out_err;
2438
2439         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2440         kvm->arch.sie_page2 =
2441              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2442         if (!kvm->arch.sie_page2)
2443                 goto out_err;
2444
2445         kvm->arch.sie_page2->kvm = kvm;
2446         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2447
2448         for (i = 0; i < kvm_s390_fac_size(); i++) {
2449                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2450                                               (kvm_s390_fac_base[i] |
2451                                                kvm_s390_fac_ext[i]);
2452                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2453                                               kvm_s390_fac_base[i];
2454         }
2455         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2456
2457         /* we are always in czam mode - even on pre z14 machines */
2458         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2459         set_kvm_facility(kvm->arch.model.fac_list, 138);
2460         /* we emulate STHYI in kvm */
2461         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2462         set_kvm_facility(kvm->arch.model.fac_list, 74);
2463         if (MACHINE_HAS_TLB_GUEST) {
2464                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2465                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2466         }
2467
2468         if (css_general_characteristics.aiv && test_facility(65))
2469                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2470
2471         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2472         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2473
2474         kvm_s390_crypto_init(kvm);
2475
2476         mutex_init(&kvm->arch.float_int.ais_lock);
2477         spin_lock_init(&kvm->arch.float_int.lock);
2478         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2479                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2480         init_waitqueue_head(&kvm->arch.ipte_wq);
2481         mutex_init(&kvm->arch.ipte_mutex);
2482
2483         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2484         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2485
2486         if (type & KVM_VM_S390_UCONTROL) {
2487                 kvm->arch.gmap = NULL;
2488                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2489         } else {
2490                 if (sclp.hamax == U64_MAX)
2491                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2492                 else
2493                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2494                                                     sclp.hamax + 1);
2495                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2496                 if (!kvm->arch.gmap)
2497                         goto out_err;
2498                 kvm->arch.gmap->private = kvm;
2499                 kvm->arch.gmap->pfault_enabled = 0;
2500         }
2501
2502         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2503         kvm->arch.use_skf = sclp.has_skey;
2504         spin_lock_init(&kvm->arch.start_stop_lock);
2505         kvm_s390_vsie_init(kvm);
2506         kvm_s390_gisa_init(kvm);
2507         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2508
2509         return 0;
2510 out_err:
2511         free_page((unsigned long)kvm->arch.sie_page2);
2512         debug_unregister(kvm->arch.dbf);
2513         sca_dispose(kvm);
2514         KVM_EVENT(3, "creation of vm failed: %d", rc);
2515         return rc;
2516 }
2517
2518 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2519 {
2520         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2521         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2522         kvm_s390_clear_local_irqs(vcpu);
2523         kvm_clear_async_pf_completion_queue(vcpu);
2524         if (!kvm_is_ucontrol(vcpu->kvm))
2525                 sca_del_vcpu(vcpu);
2526
2527         if (kvm_is_ucontrol(vcpu->kvm))
2528                 gmap_remove(vcpu->arch.gmap);
2529
2530         if (vcpu->kvm->arch.use_cmma)
2531                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2532         free_page((unsigned long)(vcpu->arch.sie_block));
2533 }
2534
2535 static void kvm_free_vcpus(struct kvm *kvm)
2536 {
2537         unsigned int i;
2538         struct kvm_vcpu *vcpu;
2539
2540         kvm_for_each_vcpu(i, vcpu, kvm)
2541                 kvm_vcpu_destroy(vcpu);
2542
2543         mutex_lock(&kvm->lock);
2544         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2545                 kvm->vcpus[i] = NULL;
2546
2547         atomic_set(&kvm->online_vcpus, 0);
2548         mutex_unlock(&kvm->lock);
2549 }
2550
2551 void kvm_arch_destroy_vm(struct kvm *kvm)
2552 {
2553         kvm_free_vcpus(kvm);
2554         sca_dispose(kvm);
2555         debug_unregister(kvm->arch.dbf);
2556         kvm_s390_gisa_destroy(kvm);
2557         free_page((unsigned long)kvm->arch.sie_page2);
2558         if (!kvm_is_ucontrol(kvm))
2559                 gmap_remove(kvm->arch.gmap);
2560         kvm_s390_destroy_adapters(kvm);
2561         kvm_s390_clear_float_irqs(kvm);
2562         kvm_s390_vsie_destroy(kvm);
2563         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2564 }
2565
2566 /* Section: vcpu related */
2567 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2568 {
2569         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2570         if (!vcpu->arch.gmap)
2571                 return -ENOMEM;
2572         vcpu->arch.gmap->private = vcpu->kvm;
2573
2574         return 0;
2575 }
2576
2577 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2578 {
2579         if (!kvm_s390_use_sca_entries())
2580                 return;
2581         read_lock(&vcpu->kvm->arch.sca_lock);
2582         if (vcpu->kvm->arch.use_esca) {
2583                 struct esca_block *sca = vcpu->kvm->arch.sca;
2584
2585                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2586                 sca->cpu[vcpu->vcpu_id].sda = 0;
2587         } else {
2588                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2589
2590                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2591                 sca->cpu[vcpu->vcpu_id].sda = 0;
2592         }
2593         read_unlock(&vcpu->kvm->arch.sca_lock);
2594 }
2595
2596 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2597 {
2598         if (!kvm_s390_use_sca_entries()) {
2599                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2600
2601                 /* we still need the basic sca for the ipte control */
2602                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2603                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2604                 return;
2605         }
2606         read_lock(&vcpu->kvm->arch.sca_lock);
2607         if (vcpu->kvm->arch.use_esca) {
2608                 struct esca_block *sca = vcpu->kvm->arch.sca;
2609
2610                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2611                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2612                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2613                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2614                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2615         } else {
2616                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2617
2618                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2619                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2620                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2621                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2622         }
2623         read_unlock(&vcpu->kvm->arch.sca_lock);
2624 }
2625
2626 /* Basic SCA to Extended SCA data copy routines */
2627 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2628 {
2629         d->sda = s->sda;
2630         d->sigp_ctrl.c = s->sigp_ctrl.c;
2631         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2632 }
2633
2634 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2635 {
2636         int i;
2637
2638         d->ipte_control = s->ipte_control;
2639         d->mcn[0] = s->mcn;
2640         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2641                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2642 }
2643
2644 static int sca_switch_to_extended(struct kvm *kvm)
2645 {
2646         struct bsca_block *old_sca = kvm->arch.sca;
2647         struct esca_block *new_sca;
2648         struct kvm_vcpu *vcpu;
2649         unsigned int vcpu_idx;
2650         u32 scaol, scaoh;
2651
2652         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2653         if (!new_sca)
2654                 return -ENOMEM;
2655
2656         scaoh = (u32)((u64)(new_sca) >> 32);
2657         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2658
2659         kvm_s390_vcpu_block_all(kvm);
2660         write_lock(&kvm->arch.sca_lock);
2661
2662         sca_copy_b_to_e(new_sca, old_sca);
2663
2664         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2665                 vcpu->arch.sie_block->scaoh = scaoh;
2666                 vcpu->arch.sie_block->scaol = scaol;
2667                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2668         }
2669         kvm->arch.sca = new_sca;
2670         kvm->arch.use_esca = 1;
2671
2672         write_unlock(&kvm->arch.sca_lock);
2673         kvm_s390_vcpu_unblock_all(kvm);
2674
2675         free_page((unsigned long)old_sca);
2676
2677         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2678                  old_sca, kvm->arch.sca);
2679         return 0;
2680 }
2681
2682 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2683 {
2684         int rc;
2685
2686         if (!kvm_s390_use_sca_entries()) {
2687                 if (id < KVM_MAX_VCPUS)
2688                         return true;
2689                 return false;
2690         }
2691         if (id < KVM_S390_BSCA_CPU_SLOTS)
2692                 return true;
2693         if (!sclp.has_esca || !sclp.has_64bscao)
2694                 return false;
2695
2696         mutex_lock(&kvm->lock);
2697         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2698         mutex_unlock(&kvm->lock);
2699
2700         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2701 }
2702
2703 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2704 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2705 {
2706         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2707         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2708         vcpu->arch.cputm_start = get_tod_clock_fast();
2709         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2710 }
2711
2712 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2713 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2714 {
2715         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2716         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2717         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2718         vcpu->arch.cputm_start = 0;
2719         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2720 }
2721
2722 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2723 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2724 {
2725         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2726         vcpu->arch.cputm_enabled = true;
2727         __start_cpu_timer_accounting(vcpu);
2728 }
2729
2730 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2731 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2732 {
2733         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2734         __stop_cpu_timer_accounting(vcpu);
2735         vcpu->arch.cputm_enabled = false;
2736 }
2737
2738 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2739 {
2740         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2741         __enable_cpu_timer_accounting(vcpu);
2742         preempt_enable();
2743 }
2744
2745 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2746 {
2747         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2748         __disable_cpu_timer_accounting(vcpu);
2749         preempt_enable();
2750 }
2751
2752 /* set the cpu timer - may only be called from the VCPU thread itself */
2753 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2754 {
2755         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2756         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2757         if (vcpu->arch.cputm_enabled)
2758                 vcpu->arch.cputm_start = get_tod_clock_fast();
2759         vcpu->arch.sie_block->cputm = cputm;
2760         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2761         preempt_enable();
2762 }
2763
2764 /* update and get the cpu timer - can also be called from other VCPU threads */
2765 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2766 {
2767         unsigned int seq;
2768         __u64 value;
2769
2770         if (unlikely(!vcpu->arch.cputm_enabled))
2771                 return vcpu->arch.sie_block->cputm;
2772
2773         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2774         do {
2775                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2776                 /*
2777                  * If the writer would ever execute a read in the critical
2778                  * section, e.g. in irq context, we have a deadlock.
2779                  */
2780                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2781                 value = vcpu->arch.sie_block->cputm;
2782                 /* if cputm_start is 0, accounting is being started/stopped */
2783                 if (likely(vcpu->arch.cputm_start))
2784                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2785         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2786         preempt_enable();
2787         return value;
2788 }
2789
2790 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2791 {
2792
2793         gmap_enable(vcpu->arch.enabled_gmap);
2794         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2795         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2796                 __start_cpu_timer_accounting(vcpu);
2797         vcpu->cpu = cpu;
2798 }
2799
2800 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2801 {
2802         vcpu->cpu = -1;
2803         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2804                 __stop_cpu_timer_accounting(vcpu);
2805         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2806         vcpu->arch.enabled_gmap = gmap_get_enabled();
2807         gmap_disable(vcpu->arch.enabled_gmap);
2808
2809 }
2810
2811 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2812 {
2813         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2814         vcpu->arch.sie_block->gpsw.mask = 0UL;
2815         vcpu->arch.sie_block->gpsw.addr = 0UL;
2816         kvm_s390_set_prefix(vcpu, 0);
2817         kvm_s390_set_cpu_timer(vcpu, 0);
2818         vcpu->arch.sie_block->ckc       = 0UL;
2819         vcpu->arch.sie_block->todpr     = 0;
2820         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2821         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2822                                         CR0_INTERRUPT_KEY_SUBMASK |
2823                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2824         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2825                                         CR14_UNUSED_33 |
2826                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2827         /* make sure the new fpc will be lazily loaded */
2828         save_fpu_regs();
2829         current->thread.fpu.fpc = 0;
2830         vcpu->arch.sie_block->gbea = 1;
2831         vcpu->arch.sie_block->pp = 0;
2832         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2833         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2834         kvm_clear_async_pf_completion_queue(vcpu);
2835         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2836                 kvm_s390_vcpu_stop(vcpu);
2837         kvm_s390_clear_local_irqs(vcpu);
2838 }
2839
2840 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2841 {
2842         mutex_lock(&vcpu->kvm->lock);
2843         preempt_disable();
2844         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2845         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2846         preempt_enable();
2847         mutex_unlock(&vcpu->kvm->lock);
2848         if (!kvm_is_ucontrol(vcpu->kvm)) {
2849                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2850                 sca_add_vcpu(vcpu);
2851         }
2852         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2853                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2854         /* make vcpu_load load the right gmap on the first trigger */
2855         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2856 }
2857
2858 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2859 {
2860         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2861             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2862                 return true;
2863         return false;
2864 }
2865
2866 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2867 {
2868         /* At least one ECC subfunction must be present */
2869         return kvm_has_pckmo_subfunc(kvm, 32) ||
2870                kvm_has_pckmo_subfunc(kvm, 33) ||
2871                kvm_has_pckmo_subfunc(kvm, 34) ||
2872                kvm_has_pckmo_subfunc(kvm, 40) ||
2873                kvm_has_pckmo_subfunc(kvm, 41);
2874
2875 }
2876
2877 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2878 {
2879         /*
2880          * If the AP instructions are not being interpreted and the MSAX3
2881          * facility is not configured for the guest, there is nothing to set up.
2882          */
2883         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2884                 return;
2885
2886         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2887         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2888         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2889         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2890
2891         if (vcpu->kvm->arch.crypto.apie)
2892                 vcpu->arch.sie_block->eca |= ECA_APIE;
2893
2894         /* Set up protected key support */
2895         if (vcpu->kvm->arch.crypto.aes_kw) {
2896                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2897                 /* ecc is also wrapped with AES key */
2898                 if (kvm_has_pckmo_ecc(vcpu->kvm))
2899                         vcpu->arch.sie_block->ecd |= ECD_ECC;
2900         }
2901
2902         if (vcpu->kvm->arch.crypto.dea_kw)
2903                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2904 }
2905
2906 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2907 {
2908         free_page(vcpu->arch.sie_block->cbrlo);
2909         vcpu->arch.sie_block->cbrlo = 0;
2910 }
2911
2912 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2913 {
2914         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2915         if (!vcpu->arch.sie_block->cbrlo)
2916                 return -ENOMEM;
2917         return 0;
2918 }
2919
2920 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2921 {
2922         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2923
2924         vcpu->arch.sie_block->ibc = model->ibc;
2925         if (test_kvm_facility(vcpu->kvm, 7))
2926                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2927 }
2928
2929 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
2930 {
2931         int rc = 0;
2932
2933         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2934                                                     CPUSTAT_SM |
2935                                                     CPUSTAT_STOPPED);
2936
2937         if (test_kvm_facility(vcpu->kvm, 78))
2938                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2939         else if (test_kvm_facility(vcpu->kvm, 8))
2940                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2941
2942         kvm_s390_vcpu_setup_model(vcpu);
2943
2944         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2945         if (MACHINE_HAS_ESOP)
2946                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2947         if (test_kvm_facility(vcpu->kvm, 9))
2948                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2949         if (test_kvm_facility(vcpu->kvm, 73))
2950                 vcpu->arch.sie_block->ecb |= ECB_TE;
2951
2952         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2953                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2954         if (test_kvm_facility(vcpu->kvm, 130))
2955                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2956         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2957         if (sclp.has_cei)
2958                 vcpu->arch.sie_block->eca |= ECA_CEI;
2959         if (sclp.has_ib)
2960                 vcpu->arch.sie_block->eca |= ECA_IB;
2961         if (sclp.has_siif)
2962                 vcpu->arch.sie_block->eca |= ECA_SII;
2963         if (sclp.has_sigpif)
2964                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2965         if (test_kvm_facility(vcpu->kvm, 129)) {
2966                 vcpu->arch.sie_block->eca |= ECA_VX;
2967                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2968         }
2969         if (test_kvm_facility(vcpu->kvm, 139))
2970                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2971         if (test_kvm_facility(vcpu->kvm, 156))
2972                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2973         if (vcpu->arch.sie_block->gd) {
2974                 vcpu->arch.sie_block->eca |= ECA_AIV;
2975                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2976                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2977         }
2978         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2979                                         | SDNXC;
2980         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2981
2982         if (sclp.has_kss)
2983                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2984         else
2985                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2986
2987         if (vcpu->kvm->arch.use_cmma) {
2988                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2989                 if (rc)
2990                         return rc;
2991         }
2992         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2993         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2994
2995         vcpu->arch.sie_block->hpid = HPID_KVM;
2996
2997         kvm_s390_vcpu_crypto_setup(vcpu);
2998
2999         return rc;
3000 }
3001
3002 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3003 {
3004         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3005                 return -EINVAL;
3006         return 0;
3007 }
3008
3009 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3010 {
3011         struct sie_page *sie_page;
3012         int rc;
3013
3014         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3015         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3016         if (!sie_page)
3017                 return -ENOMEM;
3018
3019         vcpu->arch.sie_block = &sie_page->sie_block;
3020         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3021
3022         /* the real guest size will always be smaller than msl */
3023         vcpu->arch.sie_block->mso = 0;
3024         vcpu->arch.sie_block->msl = sclp.hamax;
3025
3026         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3027         spin_lock_init(&vcpu->arch.local_int.lock);
3028         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3029         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3030                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3031         seqcount_init(&vcpu->arch.cputm_seqcount);
3032
3033         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3034         kvm_clear_async_pf_completion_queue(vcpu);
3035         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3036                                     KVM_SYNC_GPRS |
3037                                     KVM_SYNC_ACRS |
3038                                     KVM_SYNC_CRS |
3039                                     KVM_SYNC_ARCH0 |
3040                                     KVM_SYNC_PFAULT;
3041         kvm_s390_set_prefix(vcpu, 0);
3042         if (test_kvm_facility(vcpu->kvm, 64))
3043                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3044         if (test_kvm_facility(vcpu->kvm, 82))
3045                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3046         if (test_kvm_facility(vcpu->kvm, 133))
3047                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3048         if (test_kvm_facility(vcpu->kvm, 156))
3049                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3050         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3051          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3052          */
3053         if (MACHINE_HAS_VX)
3054                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3055         else
3056                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3057
3058         if (kvm_is_ucontrol(vcpu->kvm)) {
3059                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3060                 if (rc)
3061                         goto out_free_sie_block;
3062         }
3063
3064         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3065                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3066         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3067
3068         rc = kvm_s390_vcpu_setup(vcpu);
3069         if (rc)
3070                 goto out_ucontrol_uninit;
3071         return 0;
3072
3073 out_ucontrol_uninit:
3074         if (kvm_is_ucontrol(vcpu->kvm))
3075                 gmap_remove(vcpu->arch.gmap);
3076 out_free_sie_block:
3077         free_page((unsigned long)(vcpu->arch.sie_block));
3078         return rc;
3079 }
3080
3081 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3082 {
3083         return kvm_s390_vcpu_has_irq(vcpu, 0);
3084 }
3085
3086 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3087 {
3088         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3089 }
3090
3091 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3092 {
3093         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3094         exit_sie(vcpu);
3095 }
3096
3097 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3098 {
3099         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3100 }
3101
3102 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3103 {
3104         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3105         exit_sie(vcpu);
3106 }
3107
3108 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3109 {
3110         return atomic_read(&vcpu->arch.sie_block->prog20) &
3111                (PROG_BLOCK_SIE | PROG_REQUEST);
3112 }
3113
3114 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3115 {
3116         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3117 }
3118
3119 /*
3120  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3121  * If the CPU is not running (e.g. waiting as idle) the function will
3122  * return immediately. */
3123 void exit_sie(struct kvm_vcpu *vcpu)
3124 {
3125         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3126         kvm_s390_vsie_kick(vcpu);
3127         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3128                 cpu_relax();
3129 }
3130
3131 /* Kick a guest cpu out of SIE to process a request synchronously */
3132 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3133 {
3134         kvm_make_request(req, vcpu);
3135         kvm_s390_vcpu_request(vcpu);
3136 }
3137
3138 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3139                               unsigned long end)
3140 {
3141         struct kvm *kvm = gmap->private;
3142         struct kvm_vcpu *vcpu;
3143         unsigned long prefix;
3144         int i;
3145
3146         if (gmap_is_shadow(gmap))
3147                 return;
3148         if (start >= 1UL << 31)
3149                 /* We are only interested in prefix pages */
3150                 return;
3151         kvm_for_each_vcpu(i, vcpu, kvm) {
3152                 /* match against both prefix pages */
3153                 prefix = kvm_s390_get_prefix(vcpu);
3154                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3155                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3156                                    start, end);
3157                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3158                 }
3159         }
3160 }
3161
3162 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3163 {
3164         /* do not poll with more than halt_poll_max_steal percent of steal time */
3165         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3166             halt_poll_max_steal) {
3167                 vcpu->stat.halt_no_poll_steal++;
3168                 return true;
3169         }
3170         return false;
3171 }
3172
3173 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3174 {
3175         /* kvm common code refers to this, but never calls it */
3176         BUG();
3177         return 0;
3178 }
3179
3180 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3181                                            struct kvm_one_reg *reg)
3182 {
3183         int r = -EINVAL;
3184
3185         switch (reg->id) {
3186         case KVM_REG_S390_TODPR:
3187                 r = put_user(vcpu->arch.sie_block->todpr,
3188                              (u32 __user *)reg->addr);
3189                 break;
3190         case KVM_REG_S390_EPOCHDIFF:
3191                 r = put_user(vcpu->arch.sie_block->epoch,
3192                              (u64 __user *)reg->addr);
3193                 break;
3194         case KVM_REG_S390_CPU_TIMER:
3195                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3196                              (u64 __user *)reg->addr);
3197                 break;
3198         case KVM_REG_S390_CLOCK_COMP:
3199                 r = put_user(vcpu->arch.sie_block->ckc,
3200                              (u64 __user *)reg->addr);
3201                 break;
3202         case KVM_REG_S390_PFTOKEN:
3203                 r = put_user(vcpu->arch.pfault_token,
3204                              (u64 __user *)reg->addr);
3205                 break;
3206         case KVM_REG_S390_PFCOMPARE:
3207                 r = put_user(vcpu->arch.pfault_compare,
3208                              (u64 __user *)reg->addr);
3209                 break;
3210         case KVM_REG_S390_PFSELECT:
3211                 r = put_user(vcpu->arch.pfault_select,
3212                              (u64 __user *)reg->addr);
3213                 break;
3214         case KVM_REG_S390_PP:
3215                 r = put_user(vcpu->arch.sie_block->pp,
3216                              (u64 __user *)reg->addr);
3217                 break;
3218         case KVM_REG_S390_GBEA:
3219                 r = put_user(vcpu->arch.sie_block->gbea,
3220                              (u64 __user *)reg->addr);
3221                 break;
3222         default:
3223                 break;
3224         }
3225
3226         return r;
3227 }
3228
3229 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3230                                            struct kvm_one_reg *reg)
3231 {
3232         int r = -EINVAL;
3233         __u64 val;
3234
3235         switch (reg->id) {
3236         case KVM_REG_S390_TODPR:
3237                 r = get_user(vcpu->arch.sie_block->todpr,
3238                              (u32 __user *)reg->addr);
3239                 break;
3240         case KVM_REG_S390_EPOCHDIFF:
3241                 r = get_user(vcpu->arch.sie_block->epoch,
3242                              (u64 __user *)reg->addr);
3243                 break;
3244         case KVM_REG_S390_CPU_TIMER:
3245                 r = get_user(val, (u64 __user *)reg->addr);
3246                 if (!r)
3247                         kvm_s390_set_cpu_timer(vcpu, val);
3248                 break;
3249         case KVM_REG_S390_CLOCK_COMP:
3250                 r = get_user(vcpu->arch.sie_block->ckc,
3251                              (u64 __user *)reg->addr);
3252                 break;
3253         case KVM_REG_S390_PFTOKEN:
3254                 r = get_user(vcpu->arch.pfault_token,
3255                              (u64 __user *)reg->addr);
3256                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3257                         kvm_clear_async_pf_completion_queue(vcpu);
3258                 break;
3259         case KVM_REG_S390_PFCOMPARE:
3260                 r = get_user(vcpu->arch.pfault_compare,
3261                              (u64 __user *)reg->addr);
3262                 break;
3263         case KVM_REG_S390_PFSELECT:
3264                 r = get_user(vcpu->arch.pfault_select,
3265                              (u64 __user *)reg->addr);
3266                 break;
3267         case KVM_REG_S390_PP:
3268                 r = get_user(vcpu->arch.sie_block->pp,
3269                              (u64 __user *)reg->addr);
3270                 break;
3271         case KVM_REG_S390_GBEA:
3272                 r = get_user(vcpu->arch.sie_block->gbea,
3273                              (u64 __user *)reg->addr);
3274                 break;
3275         default:
3276                 break;
3277         }
3278
3279         return r;
3280 }
3281
3282 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3283 {
3284         kvm_s390_vcpu_initial_reset(vcpu);
3285         return 0;
3286 }
3287
3288 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3289 {
3290         vcpu_load(vcpu);
3291         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3292         vcpu_put(vcpu);
3293         return 0;
3294 }
3295
3296 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3297 {
3298         vcpu_load(vcpu);
3299         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3300         vcpu_put(vcpu);
3301         return 0;
3302 }
3303
3304 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3305                                   struct kvm_sregs *sregs)
3306 {
3307         vcpu_load(vcpu);
3308
3309         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3310         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3311
3312         vcpu_put(vcpu);
3313         return 0;
3314 }
3315
3316 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3317                                   struct kvm_sregs *sregs)
3318 {
3319         vcpu_load(vcpu);
3320
3321         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3322         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3323
3324         vcpu_put(vcpu);
3325         return 0;
3326 }
3327
3328 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3329 {
3330         int ret = 0;
3331
3332         vcpu_load(vcpu);
3333
3334         if (test_fp_ctl(fpu->fpc)) {
3335                 ret = -EINVAL;
3336                 goto out;
3337         }
3338         vcpu->run->s.regs.fpc = fpu->fpc;
3339         if (MACHINE_HAS_VX)
3340                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3341                                  (freg_t *) fpu->fprs);
3342         else
3343                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3344
3345 out:
3346         vcpu_put(vcpu);
3347         return ret;
3348 }
3349
3350 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3351 {
3352         vcpu_load(vcpu);
3353
3354         /* make sure we have the latest values */
3355         save_fpu_regs();
3356         if (MACHINE_HAS_VX)
3357                 convert_vx_to_fp((freg_t *) fpu->fprs,
3358                                  (__vector128 *) vcpu->run->s.regs.vrs);
3359         else
3360                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3361         fpu->fpc = vcpu->run->s.regs.fpc;
3362
3363         vcpu_put(vcpu);
3364         return 0;
3365 }
3366
3367 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3368 {
3369         int rc = 0;
3370
3371         if (!is_vcpu_stopped(vcpu))
3372                 rc = -EBUSY;
3373         else {
3374                 vcpu->run->psw_mask = psw.mask;
3375                 vcpu->run->psw_addr = psw.addr;
3376         }
3377         return rc;
3378 }
3379
3380 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3381                                   struct kvm_translation *tr)
3382 {
3383         return -EINVAL; /* not implemented yet */
3384 }
3385
3386 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3387                               KVM_GUESTDBG_USE_HW_BP | \
3388                               KVM_GUESTDBG_ENABLE)
3389
3390 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3391                                         struct kvm_guest_debug *dbg)
3392 {
3393         int rc = 0;
3394
3395         vcpu_load(vcpu);
3396
3397         vcpu->guest_debug = 0;
3398         kvm_s390_clear_bp_data(vcpu);
3399
3400         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3401                 rc = -EINVAL;
3402                 goto out;
3403         }
3404         if (!sclp.has_gpere) {
3405                 rc = -EINVAL;
3406                 goto out;
3407         }
3408
3409         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3410                 vcpu->guest_debug = dbg->control;
3411                 /* enforce guest PER */
3412                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3413
3414                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3415                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3416         } else {
3417                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3418                 vcpu->arch.guestdbg.last_bp = 0;
3419         }
3420
3421         if (rc) {
3422                 vcpu->guest_debug = 0;
3423                 kvm_s390_clear_bp_data(vcpu);
3424                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3425         }
3426
3427 out:
3428         vcpu_put(vcpu);
3429         return rc;
3430 }
3431
3432 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3433                                     struct kvm_mp_state *mp_state)
3434 {
3435         int ret;
3436
3437         vcpu_load(vcpu);
3438
3439         /* CHECK_STOP and LOAD are not supported yet */
3440         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3441                                       KVM_MP_STATE_OPERATING;
3442
3443         vcpu_put(vcpu);
3444         return ret;
3445 }
3446
3447 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3448                                     struct kvm_mp_state *mp_state)
3449 {
3450         int rc = 0;
3451
3452         vcpu_load(vcpu);
3453
3454         /* user space knows about this interface - let it control the state */
3455         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3456
3457         switch (mp_state->mp_state) {
3458         case KVM_MP_STATE_STOPPED:
3459                 kvm_s390_vcpu_stop(vcpu);
3460                 break;
3461         case KVM_MP_STATE_OPERATING:
3462                 kvm_s390_vcpu_start(vcpu);
3463                 break;
3464         case KVM_MP_STATE_LOAD:
3465         case KVM_MP_STATE_CHECK_STOP:
3466                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3467         default:
3468                 rc = -ENXIO;
3469         }
3470
3471         vcpu_put(vcpu);
3472         return rc;
3473 }
3474
3475 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3476 {
3477         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3478 }
3479
3480 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3481 {
3482 retry:
3483         kvm_s390_vcpu_request_handled(vcpu);
3484         if (!kvm_request_pending(vcpu))
3485                 return 0;
3486         /*
3487          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3488          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3489          * This ensures that the ipte instruction for this request has
3490          * already finished. We might race against a second unmapper that
3491          * wants to set the blocking bit. Lets just retry the request loop.
3492          */
3493         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3494                 int rc;
3495                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3496                                           kvm_s390_get_prefix(vcpu),
3497                                           PAGE_SIZE * 2, PROT_WRITE);
3498                 if (rc) {
3499                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3500                         return rc;
3501                 }
3502                 goto retry;
3503         }
3504
3505         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3506                 vcpu->arch.sie_block->ihcpu = 0xffff;
3507                 goto retry;
3508         }
3509
3510         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3511                 if (!ibs_enabled(vcpu)) {
3512                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3513                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3514                 }
3515                 goto retry;
3516         }
3517
3518         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3519                 if (ibs_enabled(vcpu)) {
3520                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3521                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3522                 }
3523                 goto retry;
3524         }
3525
3526         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3527                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3528                 goto retry;
3529         }
3530
3531         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3532                 /*
3533                  * Disable CMM virtualization; we will emulate the ESSA
3534                  * instruction manually, in order to provide additional
3535                  * functionalities needed for live migration.
3536                  */
3537                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3538                 goto retry;
3539         }
3540
3541         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3542                 /*
3543                  * Re-enable CMM virtualization if CMMA is available and
3544                  * CMM has been used.
3545                  */
3546                 if ((vcpu->kvm->arch.use_cmma) &&
3547                     (vcpu->kvm->mm->context.uses_cmm))
3548                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3549                 goto retry;
3550         }
3551
3552         /* nothing to do, just clear the request */
3553         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3554         /* we left the vsie handler, nothing to do, just clear the request */
3555         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3556
3557         return 0;
3558 }
3559
3560 void kvm_s390_set_tod_clock(struct kvm *kvm,
3561                             const struct kvm_s390_vm_tod_clock *gtod)
3562 {
3563         struct kvm_vcpu *vcpu;
3564         struct kvm_s390_tod_clock_ext htod;
3565         int i;
3566
3567         mutex_lock(&kvm->lock);
3568         preempt_disable();
3569
3570         get_tod_clock_ext((char *)&htod);
3571
3572         kvm->arch.epoch = gtod->tod - htod.tod;
3573         kvm->arch.epdx = 0;
3574         if (test_kvm_facility(kvm, 139)) {
3575                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3576                 if (kvm->arch.epoch > gtod->tod)
3577                         kvm->arch.epdx -= 1;
3578         }
3579
3580         kvm_s390_vcpu_block_all(kvm);
3581         kvm_for_each_vcpu(i, vcpu, kvm) {
3582                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3583                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3584         }
3585
3586         kvm_s390_vcpu_unblock_all(kvm);
3587         preempt_enable();
3588         mutex_unlock(&kvm->lock);
3589 }
3590
3591 /**
3592  * kvm_arch_fault_in_page - fault-in guest page if necessary
3593  * @vcpu: The corresponding virtual cpu
3594  * @gpa: Guest physical address
3595  * @writable: Whether the page should be writable or not
3596  *
3597  * Make sure that a guest page has been faulted-in on the host.
3598  *
3599  * Return: Zero on success, negative error code otherwise.
3600  */
3601 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3602 {
3603         return gmap_fault(vcpu->arch.gmap, gpa,
3604                           writable ? FAULT_FLAG_WRITE : 0);
3605 }
3606
3607 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3608                                       unsigned long token)
3609 {
3610         struct kvm_s390_interrupt inti;
3611         struct kvm_s390_irq irq;
3612
3613         if (start_token) {
3614                 irq.u.ext.ext_params2 = token;
3615                 irq.type = KVM_S390_INT_PFAULT_INIT;
3616                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3617         } else {
3618                 inti.type = KVM_S390_INT_PFAULT_DONE;
3619                 inti.parm64 = token;
3620                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3621         }
3622 }
3623
3624 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3625                                      struct kvm_async_pf *work)
3626 {
3627         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3628         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3629 }
3630
3631 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3632                                  struct kvm_async_pf *work)
3633 {
3634         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3635         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3636 }
3637
3638 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3639                                struct kvm_async_pf *work)
3640 {
3641         /* s390 will always inject the page directly */
3642 }
3643
3644 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3645 {
3646         /*
3647          * s390 will always inject the page directly,
3648          * but we still want check_async_completion to cleanup
3649          */
3650         return true;
3651 }
3652
3653 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3654 {
3655         hva_t hva;
3656         struct kvm_arch_async_pf arch;
3657         int rc;
3658
3659         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3660                 return 0;
3661         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3662             vcpu->arch.pfault_compare)
3663                 return 0;
3664         if (psw_extint_disabled(vcpu))
3665                 return 0;
3666         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3667                 return 0;
3668         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3669                 return 0;
3670         if (!vcpu->arch.gmap->pfault_enabled)
3671                 return 0;
3672
3673         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3674         hva += current->thread.gmap_addr & ~PAGE_MASK;
3675         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3676                 return 0;
3677
3678         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3679         return rc;
3680 }
3681
3682 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3683 {
3684         int rc, cpuflags;
3685
3686         /*
3687          * On s390 notifications for arriving pages will be delivered directly
3688          * to the guest but the house keeping for completed pfaults is
3689          * handled outside the worker.
3690          */
3691         kvm_check_async_pf_completion(vcpu);
3692
3693         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3694         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3695
3696         if (need_resched())
3697                 schedule();
3698
3699         if (test_cpu_flag(CIF_MCCK_PENDING))
3700                 s390_handle_mcck();
3701
3702         if (!kvm_is_ucontrol(vcpu->kvm)) {
3703                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3704                 if (rc)
3705                         return rc;
3706         }
3707
3708         rc = kvm_s390_handle_requests(vcpu);
3709         if (rc)
3710                 return rc;
3711
3712         if (guestdbg_enabled(vcpu)) {
3713                 kvm_s390_backup_guest_per_regs(vcpu);
3714                 kvm_s390_patch_guest_per_regs(vcpu);
3715         }
3716
3717         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3718
3719         vcpu->arch.sie_block->icptcode = 0;
3720         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3721         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3722         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3723
3724         return 0;
3725 }
3726
3727 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3728 {
3729         struct kvm_s390_pgm_info pgm_info = {
3730                 .code = PGM_ADDRESSING,
3731         };
3732         u8 opcode, ilen;
3733         int rc;
3734
3735         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3736         trace_kvm_s390_sie_fault(vcpu);
3737
3738         /*
3739          * We want to inject an addressing exception, which is defined as a
3740          * suppressing or terminating exception. However, since we came here
3741          * by a DAT access exception, the PSW still points to the faulting
3742          * instruction since DAT exceptions are nullifying. So we've got
3743          * to look up the current opcode to get the length of the instruction
3744          * to be able to forward the PSW.
3745          */
3746         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3747         ilen = insn_length(opcode);
3748         if (rc < 0) {
3749                 return rc;
3750         } else if (rc) {
3751                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3752                  * Forward by arbitrary ilc, injection will take care of
3753                  * nullification if necessary.
3754                  */
3755                 pgm_info = vcpu->arch.pgm;
3756                 ilen = 4;
3757         }
3758         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3759         kvm_s390_forward_psw(vcpu, ilen);
3760         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3761 }
3762
3763 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3764 {
3765         struct mcck_volatile_info *mcck_info;
3766         struct sie_page *sie_page;
3767
3768         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3769                    vcpu->arch.sie_block->icptcode);
3770         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3771
3772         if (guestdbg_enabled(vcpu))
3773                 kvm_s390_restore_guest_per_regs(vcpu);
3774
3775         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3776         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3777
3778         if (exit_reason == -EINTR) {
3779                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3780                 sie_page = container_of(vcpu->arch.sie_block,
3781                                         struct sie_page, sie_block);
3782                 mcck_info = &sie_page->mcck_info;
3783                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3784                 return 0;
3785         }
3786
3787         if (vcpu->arch.sie_block->icptcode > 0) {
3788                 int rc = kvm_handle_sie_intercept(vcpu);
3789
3790                 if (rc != -EOPNOTSUPP)
3791                         return rc;
3792                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3793                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3794                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3795                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3796                 return -EREMOTE;
3797         } else if (exit_reason != -EFAULT) {
3798                 vcpu->stat.exit_null++;
3799                 return 0;
3800         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3801                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3802                 vcpu->run->s390_ucontrol.trans_exc_code =
3803                                                 current->thread.gmap_addr;
3804                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3805                 return -EREMOTE;
3806         } else if (current->thread.gmap_pfault) {
3807                 trace_kvm_s390_major_guest_pfault(vcpu);
3808                 current->thread.gmap_pfault = 0;
3809                 if (kvm_arch_setup_async_pf(vcpu))
3810                         return 0;
3811                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3812         }
3813         return vcpu_post_run_fault_in_sie(vcpu);
3814 }
3815
3816 static int __vcpu_run(struct kvm_vcpu *vcpu)
3817 {
3818         int rc, exit_reason;
3819
3820         /*
3821          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3822          * ning the guest), so that memslots (and other stuff) are protected
3823          */
3824         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3825
3826         do {
3827                 rc = vcpu_pre_run(vcpu);
3828                 if (rc)
3829                         break;
3830
3831                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3832                 /*
3833                  * As PF_VCPU will be used in fault handler, between
3834                  * guest_enter and guest_exit should be no uaccess.
3835                  */
3836                 local_irq_disable();
3837                 guest_enter_irqoff();
3838                 __disable_cpu_timer_accounting(vcpu);
3839                 local_irq_enable();
3840                 exit_reason = sie64a(vcpu->arch.sie_block,
3841                                      vcpu->run->s.regs.gprs);
3842                 local_irq_disable();
3843                 __enable_cpu_timer_accounting(vcpu);
3844                 guest_exit_irqoff();
3845                 local_irq_enable();
3846                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3847
3848                 rc = vcpu_post_run(vcpu, exit_reason);
3849         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3850
3851         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3852         return rc;
3853 }
3854
3855 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3856 {
3857         struct runtime_instr_cb *riccb;
3858         struct gs_cb *gscb;
3859
3860         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3861         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3862         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3863         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3864         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3865                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3866         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3867                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3868                 /* some control register changes require a tlb flush */
3869                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3870         }
3871         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3872                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3873                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3874                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3875                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3876                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3877         }
3878         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3879                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3880                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3881                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3882                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3883                         kvm_clear_async_pf_completion_queue(vcpu);
3884         }
3885         /*
3886          * If userspace sets the riccb (e.g. after migration) to a valid state,
3887          * we should enable RI here instead of doing the lazy enablement.
3888          */
3889         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3890             test_kvm_facility(vcpu->kvm, 64) &&
3891             riccb->v &&
3892             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3893                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3894                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3895         }
3896         /*
3897          * If userspace sets the gscb (e.g. after migration) to non-zero,
3898          * we should enable GS here instead of doing the lazy enablement.
3899          */
3900         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3901             test_kvm_facility(vcpu->kvm, 133) &&
3902             gscb->gssm &&
3903             !vcpu->arch.gs_enabled) {
3904                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3905                 vcpu->arch.sie_block->ecb |= ECB_GS;
3906                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3907                 vcpu->arch.gs_enabled = 1;
3908         }
3909         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3910             test_kvm_facility(vcpu->kvm, 82)) {
3911                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3912                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3913         }
3914         save_access_regs(vcpu->arch.host_acrs);
3915         restore_access_regs(vcpu->run->s.regs.acrs);
3916         /* save host (userspace) fprs/vrs */
3917         save_fpu_regs();
3918         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3919         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3920         if (MACHINE_HAS_VX)
3921                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3922         else
3923                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3924         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3925         if (test_fp_ctl(current->thread.fpu.fpc))
3926                 /* User space provided an invalid FPC, let's clear it */
3927                 current->thread.fpu.fpc = 0;
3928         if (MACHINE_HAS_GS) {
3929                 preempt_disable();
3930                 __ctl_set_bit(2, 4);
3931                 if (current->thread.gs_cb) {
3932                         vcpu->arch.host_gscb = current->thread.gs_cb;
3933                         save_gs_cb(vcpu->arch.host_gscb);
3934                 }
3935                 if (vcpu->arch.gs_enabled) {
3936                         current->thread.gs_cb = (struct gs_cb *)
3937                                                 &vcpu->run->s.regs.gscb;
3938                         restore_gs_cb(current->thread.gs_cb);
3939                 }
3940                 preempt_enable();
3941         }
3942         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3943
3944         kvm_run->kvm_dirty_regs = 0;
3945 }
3946
3947 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3948 {
3949         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3950         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3951         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3952         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3953         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3954         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3955         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3956         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3957         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3958         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3959         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3960         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3961         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3962         save_access_regs(vcpu->run->s.regs.acrs);
3963         restore_access_regs(vcpu->arch.host_acrs);
3964         /* Save guest register state */
3965         save_fpu_regs();
3966         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3967         /* Restore will be done lazily at return */
3968         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3969         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3970         if (MACHINE_HAS_GS) {
3971                 __ctl_set_bit(2, 4);
3972                 if (vcpu->arch.gs_enabled)
3973                         save_gs_cb(current->thread.gs_cb);
3974                 preempt_disable();
3975                 current->thread.gs_cb = vcpu->arch.host_gscb;
3976                 restore_gs_cb(vcpu->arch.host_gscb);
3977                 preempt_enable();
3978                 if (!vcpu->arch.host_gscb)
3979                         __ctl_clear_bit(2, 4);
3980                 vcpu->arch.host_gscb = NULL;
3981         }
3982         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3983 }
3984
3985 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3986 {
3987         int rc;
3988
3989         if (kvm_run->immediate_exit)
3990                 return -EINTR;
3991
3992         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
3993             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
3994                 return -EINVAL;
3995
3996         vcpu_load(vcpu);
3997
3998         if (guestdbg_exit_pending(vcpu)) {
3999                 kvm_s390_prepare_debug_exit(vcpu);
4000                 rc = 0;
4001                 goto out;
4002         }
4003
4004         kvm_sigset_activate(vcpu);
4005
4006         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4007                 kvm_s390_vcpu_start(vcpu);
4008         } else if (is_vcpu_stopped(vcpu)) {
4009                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4010                                    vcpu->vcpu_id);
4011                 rc = -EINVAL;
4012                 goto out;
4013         }
4014
4015         sync_regs(vcpu, kvm_run);
4016         enable_cpu_timer_accounting(vcpu);
4017
4018         might_fault();
4019         rc = __vcpu_run(vcpu);
4020
4021         if (signal_pending(current) && !rc) {
4022                 kvm_run->exit_reason = KVM_EXIT_INTR;
4023                 rc = -EINTR;
4024         }
4025
4026         if (guestdbg_exit_pending(vcpu) && !rc)  {
4027                 kvm_s390_prepare_debug_exit(vcpu);
4028                 rc = 0;
4029         }
4030
4031         if (rc == -EREMOTE) {
4032                 /* userspace support is needed, kvm_run has been prepared */
4033                 rc = 0;
4034         }
4035
4036         disable_cpu_timer_accounting(vcpu);
4037         store_regs(vcpu, kvm_run);
4038
4039         kvm_sigset_deactivate(vcpu);
4040
4041         vcpu->stat.exit_userspace++;
4042 out:
4043         vcpu_put(vcpu);
4044         return rc;
4045 }
4046
4047 /*
4048  * store status at address
4049  * we use have two special cases:
4050  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4051  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4052  */
4053 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4054 {
4055         unsigned char archmode = 1;
4056         freg_t fprs[NUM_FPRS];
4057         unsigned int px;
4058         u64 clkcomp, cputm;
4059         int rc;
4060
4061         px = kvm_s390_get_prefix(vcpu);
4062         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4063                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4064                         return -EFAULT;
4065                 gpa = 0;
4066         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4067                 if (write_guest_real(vcpu, 163, &archmode, 1))
4068                         return -EFAULT;
4069                 gpa = px;
4070         } else
4071                 gpa -= __LC_FPREGS_SAVE_AREA;
4072
4073         /* manually convert vector registers if necessary */
4074         if (MACHINE_HAS_VX) {
4075                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4076                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4077                                      fprs, 128);
4078         } else {
4079                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4080                                      vcpu->run->s.regs.fprs, 128);
4081         }
4082         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4083                               vcpu->run->s.regs.gprs, 128);
4084         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4085                               &vcpu->arch.sie_block->gpsw, 16);
4086         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4087                               &px, 4);
4088         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4089                               &vcpu->run->s.regs.fpc, 4);
4090         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4091                               &vcpu->arch.sie_block->todpr, 4);
4092         cputm = kvm_s390_get_cpu_timer(vcpu);
4093         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4094                               &cputm, 8);
4095         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4096         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4097                               &clkcomp, 8);
4098         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4099                               &vcpu->run->s.regs.acrs, 64);
4100         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4101                               &vcpu->arch.sie_block->gcr, 128);
4102         return rc ? -EFAULT : 0;
4103 }
4104
4105 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4106 {
4107         /*
4108          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4109          * switch in the run ioctl. Let's update our copies before we save
4110          * it into the save area
4111          */
4112         save_fpu_regs();
4113         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4114         save_access_regs(vcpu->run->s.regs.acrs);
4115
4116         return kvm_s390_store_status_unloaded(vcpu, addr);
4117 }
4118
4119 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4120 {
4121         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4122         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4123 }
4124
4125 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4126 {
4127         unsigned int i;
4128         struct kvm_vcpu *vcpu;
4129
4130         kvm_for_each_vcpu(i, vcpu, kvm) {
4131                 __disable_ibs_on_vcpu(vcpu);
4132         }
4133 }
4134
4135 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4136 {
4137         if (!sclp.has_ibs)
4138                 return;
4139         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4140         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4141 }
4142
4143 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4144 {
4145         int i, online_vcpus, started_vcpus = 0;
4146
4147         if (!is_vcpu_stopped(vcpu))
4148                 return;
4149
4150         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4151         /* Only one cpu at a time may enter/leave the STOPPED state. */
4152         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4153         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4154
4155         for (i = 0; i < online_vcpus; i++) {
4156                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4157                         started_vcpus++;
4158         }
4159
4160         if (started_vcpus == 0) {
4161                 /* we're the only active VCPU -> speed it up */
4162                 __enable_ibs_on_vcpu(vcpu);
4163         } else if (started_vcpus == 1) {
4164                 /*
4165                  * As we are starting a second VCPU, we have to disable
4166                  * the IBS facility on all VCPUs to remove potentially
4167                  * oustanding ENABLE requests.
4168                  */
4169                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4170         }
4171
4172         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4173         /*
4174          * Another VCPU might have used IBS while we were offline.
4175          * Let's play safe and flush the VCPU at startup.
4176          */
4177         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4178         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4179         return;
4180 }
4181
4182 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4183 {
4184         int i, online_vcpus, started_vcpus = 0;
4185         struct kvm_vcpu *started_vcpu = NULL;
4186
4187         if (is_vcpu_stopped(vcpu))
4188                 return;
4189
4190         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4191         /* Only one cpu at a time may enter/leave the STOPPED state. */
4192         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4193         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4194
4195         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4196         kvm_s390_clear_stop_irq(vcpu);
4197
4198         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4199         __disable_ibs_on_vcpu(vcpu);
4200
4201         for (i = 0; i < online_vcpus; i++) {
4202                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4203                         started_vcpus++;
4204                         started_vcpu = vcpu->kvm->vcpus[i];
4205                 }
4206         }
4207
4208         if (started_vcpus == 1) {
4209                 /*
4210                  * As we only have one VCPU left, we want to enable the
4211                  * IBS facility for that VCPU to speed it up.
4212                  */
4213                 __enable_ibs_on_vcpu(started_vcpu);
4214         }
4215
4216         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4217         return;
4218 }
4219
4220 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4221                                      struct kvm_enable_cap *cap)
4222 {
4223         int r;
4224
4225         if (cap->flags)
4226                 return -EINVAL;
4227
4228         switch (cap->cap) {
4229         case KVM_CAP_S390_CSS_SUPPORT:
4230                 if (!vcpu->kvm->arch.css_support) {
4231                         vcpu->kvm->arch.css_support = 1;
4232                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4233                         trace_kvm_s390_enable_css(vcpu->kvm);
4234                 }
4235                 r = 0;
4236                 break;
4237         default:
4238                 r = -EINVAL;
4239                 break;
4240         }
4241         return r;
4242 }
4243
4244 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4245                                   struct kvm_s390_mem_op *mop)
4246 {
4247         void __user *uaddr = (void __user *)mop->buf;
4248         void *tmpbuf = NULL;
4249         int r, srcu_idx;
4250         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4251                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4252
4253         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4254                 return -EINVAL;
4255
4256         if (mop->size > MEM_OP_MAX_SIZE)
4257                 return -E2BIG;
4258
4259         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4260                 tmpbuf = vmalloc(mop->size);
4261                 if (!tmpbuf)
4262                         return -ENOMEM;
4263         }
4264
4265         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4266
4267         switch (mop->op) {
4268         case KVM_S390_MEMOP_LOGICAL_READ:
4269                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4270                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4271                                             mop->size, GACC_FETCH);
4272                         break;
4273                 }
4274                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4275                 if (r == 0) {
4276                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4277                                 r = -EFAULT;
4278                 }
4279                 break;
4280         case KVM_S390_MEMOP_LOGICAL_WRITE:
4281                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4282                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4283                                             mop->size, GACC_STORE);
4284                         break;
4285                 }
4286                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4287                         r = -EFAULT;
4288                         break;
4289                 }
4290                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4291                 break;
4292         default:
4293                 r = -EINVAL;
4294         }
4295
4296         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4297
4298         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4299                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4300
4301         vfree(tmpbuf);
4302         return r;
4303 }
4304
4305 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4306                                unsigned int ioctl, unsigned long arg)
4307 {
4308         struct kvm_vcpu *vcpu = filp->private_data;
4309         void __user *argp = (void __user *)arg;
4310
4311         switch (ioctl) {
4312         case KVM_S390_IRQ: {
4313                 struct kvm_s390_irq s390irq;
4314
4315                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4316                         return -EFAULT;
4317                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4318         }
4319         case KVM_S390_INTERRUPT: {
4320                 struct kvm_s390_interrupt s390int;
4321                 struct kvm_s390_irq s390irq = {};
4322
4323                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4324                         return -EFAULT;
4325                 if (s390int_to_s390irq(&s390int, &s390irq))
4326                         return -EINVAL;
4327                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4328         }
4329         }
4330         return -ENOIOCTLCMD;
4331 }
4332
4333 long kvm_arch_vcpu_ioctl(struct file *filp,
4334                          unsigned int ioctl, unsigned long arg)
4335 {
4336         struct kvm_vcpu *vcpu = filp->private_data;
4337         void __user *argp = (void __user *)arg;
4338         int idx;
4339         long r;
4340
4341         vcpu_load(vcpu);
4342
4343         switch (ioctl) {
4344         case KVM_S390_STORE_STATUS:
4345                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4346                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4347                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4348                 break;
4349         case KVM_S390_SET_INITIAL_PSW: {
4350                 psw_t psw;
4351
4352                 r = -EFAULT;
4353                 if (copy_from_user(&psw, argp, sizeof(psw)))
4354                         break;
4355                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4356                 break;
4357         }
4358         case KVM_S390_INITIAL_RESET:
4359                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4360                 break;
4361         case KVM_SET_ONE_REG:
4362         case KVM_GET_ONE_REG: {
4363                 struct kvm_one_reg reg;
4364                 r = -EFAULT;
4365                 if (copy_from_user(&reg, argp, sizeof(reg)))
4366                         break;
4367                 if (ioctl == KVM_SET_ONE_REG)
4368                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4369                 else
4370                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4371                 break;
4372         }
4373 #ifdef CONFIG_KVM_S390_UCONTROL
4374         case KVM_S390_UCAS_MAP: {
4375                 struct kvm_s390_ucas_mapping ucasmap;
4376
4377                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4378                         r = -EFAULT;
4379                         break;
4380                 }
4381
4382                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4383                         r = -EINVAL;
4384                         break;
4385                 }
4386
4387                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4388                                      ucasmap.vcpu_addr, ucasmap.length);
4389                 break;
4390         }
4391         case KVM_S390_UCAS_UNMAP: {
4392                 struct kvm_s390_ucas_mapping ucasmap;
4393
4394                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4395                         r = -EFAULT;
4396                         break;
4397                 }
4398
4399                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4400                         r = -EINVAL;
4401                         break;
4402                 }
4403
4404                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4405                         ucasmap.length);
4406                 break;
4407         }
4408 #endif
4409         case KVM_S390_VCPU_FAULT: {
4410                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4411                 break;
4412         }
4413         case KVM_ENABLE_CAP:
4414         {
4415                 struct kvm_enable_cap cap;
4416                 r = -EFAULT;
4417                 if (copy_from_user(&cap, argp, sizeof(cap)))
4418                         break;
4419                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4420                 break;
4421         }
4422         case KVM_S390_MEM_OP: {
4423                 struct kvm_s390_mem_op mem_op;
4424
4425                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4426                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4427                 else
4428                         r = -EFAULT;
4429                 break;
4430         }
4431         case KVM_S390_SET_IRQ_STATE: {
4432                 struct kvm_s390_irq_state irq_state;
4433
4434                 r = -EFAULT;
4435                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4436                         break;
4437                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4438                     irq_state.len == 0 ||
4439                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4440                         r = -EINVAL;
4441                         break;
4442                 }
4443                 /* do not use irq_state.flags, it will break old QEMUs */
4444                 r = kvm_s390_set_irq_state(vcpu,
4445                                            (void __user *) irq_state.buf,
4446                                            irq_state.len);
4447                 break;
4448         }
4449         case KVM_S390_GET_IRQ_STATE: {
4450                 struct kvm_s390_irq_state irq_state;
4451
4452                 r = -EFAULT;
4453                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4454                         break;
4455                 if (irq_state.len == 0) {
4456                         r = -EINVAL;
4457                         break;
4458                 }
4459                 /* do not use irq_state.flags, it will break old QEMUs */
4460                 r = kvm_s390_get_irq_state(vcpu,
4461                                            (__u8 __user *)  irq_state.buf,
4462                                            irq_state.len);
4463                 break;
4464         }
4465         default:
4466                 r = -ENOTTY;
4467         }
4468
4469         vcpu_put(vcpu);
4470         return r;
4471 }
4472
4473 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4474 {
4475 #ifdef CONFIG_KVM_S390_UCONTROL
4476         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4477                  && (kvm_is_ucontrol(vcpu->kvm))) {
4478                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4479                 get_page(vmf->page);
4480                 return 0;
4481         }
4482 #endif
4483         return VM_FAULT_SIGBUS;
4484 }
4485
4486 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4487                             unsigned long npages)
4488 {
4489         return 0;
4490 }
4491
4492 /* Section: memory related */
4493 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4494                                    struct kvm_memory_slot *memslot,
4495                                    const struct kvm_userspace_memory_region *mem,
4496                                    enum kvm_mr_change change)
4497 {
4498         /* A few sanity checks. We can have memory slots which have to be
4499            located/ended at a segment boundary (1MB). The memory in userland is
4500            ok to be fragmented into various different vmas. It is okay to mmap()
4501            and munmap() stuff in this slot after doing this call at any time */
4502
4503         if (mem->userspace_addr & 0xffffful)
4504                 return -EINVAL;
4505
4506         if (mem->memory_size & 0xffffful)
4507                 return -EINVAL;
4508
4509         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4510                 return -EINVAL;
4511
4512         return 0;
4513 }
4514
4515 void kvm_arch_commit_memory_region(struct kvm *kvm,
4516                                 const struct kvm_userspace_memory_region *mem,
4517                                 const struct kvm_memory_slot *old,
4518                                 const struct kvm_memory_slot *new,
4519                                 enum kvm_mr_change change)
4520 {
4521         int rc = 0;
4522
4523         switch (change) {
4524         case KVM_MR_DELETE:
4525                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4526                                         old->npages * PAGE_SIZE);
4527                 break;
4528         case KVM_MR_MOVE:
4529                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4530                                         old->npages * PAGE_SIZE);
4531                 if (rc)
4532                         break;
4533                 /* FALLTHROUGH */
4534         case KVM_MR_CREATE:
4535                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4536                                       mem->guest_phys_addr, mem->memory_size);
4537                 break;
4538         case KVM_MR_FLAGS_ONLY:
4539                 break;
4540         default:
4541                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4542         }
4543         if (rc)
4544                 pr_warn("failed to commit memory region\n");
4545         return;
4546 }
4547
4548 static inline unsigned long nonhyp_mask(int i)
4549 {
4550         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4551
4552         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4553 }
4554
4555 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4556 {
4557         vcpu->valid_wakeup = false;
4558 }
4559
4560 static int __init kvm_s390_init(void)
4561 {
4562         int i;
4563
4564         if (!sclp.has_sief2) {
4565                 pr_info("SIE is not available\n");
4566                 return -ENODEV;
4567         }
4568
4569         if (nested && hpage) {
4570                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4571                 return -EINVAL;
4572         }
4573
4574         for (i = 0; i < 16; i++)
4575                 kvm_s390_fac_base[i] |=
4576                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4577
4578         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4579 }
4580
4581 static void __exit kvm_s390_exit(void)
4582 {
4583         kvm_exit();
4584 }
4585
4586 module_init(kvm_s390_init);
4587 module_exit(kvm_s390_exit);
4588
4589 /*
4590  * Enable autoloading of the kvm module.
4591  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4592  * since x86 takes a different approach.
4593  */
4594 #include <linux/miscdevice.h>
4595 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4596 MODULE_ALIAS("devname:kvm");