arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include "kvm-s390.h"
  48 #include "gaccess.h"
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  85         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  86         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  87         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  88         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  89         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  90         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  91         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  92         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  93         { "deliver_program", VCPU_STAT(deliver_program) },
  94         { "deliver_io", VCPU_STAT(deliver_io) },
  95         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  96         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  97         { "inject_ckc", VCPU_STAT(inject_ckc) },
  98         { "inject_cputm", VCPU_STAT(inject_cputm) },
  99         { "inject_external_call", VCPU_STAT(inject_external_call) },
 100         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 101         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 102         { "inject_io", VM_STAT(inject_io) },
 103         { "inject_mchk", VCPU_STAT(inject_mchk) },
 104         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 105         { "inject_program", VCPU_STAT(inject_program) },
 106         { "inject_restart", VCPU_STAT(inject_restart) },
 107         { "inject_service_signal", VM_STAT(inject_service_signal) },
 108         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 109         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 110         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 111         { "inject_virtio", VM_STAT(inject_virtio) },
 112         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 113         { "instruction_gs", VCPU_STAT(instruction_gs) },
 114         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 115         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 116         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 117         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 118         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 119         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 120         { "instruction_sck", VCPU_STAT(instruction_sck) },
 121         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 122         { "instruction_spx", VCPU_STAT(instruction_spx) },
 123         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 124         { "instruction_stap", VCPU_STAT(instruction_stap) },
 125         { "instruction_iske", VCPU_STAT(instruction_iske) },
 126         { "instruction_ri", VCPU_STAT(instruction_ri) },
 127         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 128         { "instruction_sske", VCPU_STAT(instruction_sske) },
 129         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 130         { "instruction_essa", VCPU_STAT(instruction_essa) },
 131         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 132         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 133         { "instruction_tb", VCPU_STAT(instruction_tb) },
 134         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 135         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 136         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 137         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 138         { "instruction_sie", VCPU_STAT(instruction_sie) },
 139         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 140         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 141         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 142         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 143         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 144         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 145         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 146         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 147         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 148         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 149         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 150         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 151         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 152         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 153         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 154         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 155         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 156         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 157         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 158         { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
 159         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 160         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 161         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 162         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 163         { NULL }
 164 };
 165
 166 struct kvm_s390_tod_clock_ext {
 167         __u8 epoch_idx;
 168         __u64 tod;
 169         __u8 reserved[7];
 170 } __packed;
 171
 172 /* allow nested virtualization in KVM (if enabled by user space) */
 173 static int nested;
 174 module_param(nested, int, S_IRUGO);
 175 MODULE_PARM_DESC(nested, "Nested virtualization support");
 176
 177 /* allow 1m huge page guest backing, if !nested */
 178 static int hpage;
 179 module_param(hpage, int, 0444);
 180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 181
 182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 183 static u8 halt_poll_max_steal = 10;
 184 module_param(halt_poll_max_steal, byte, 0644);
 185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 186
 187 /*
 188  * For now we handle at most 16 double words as this is what the s390 base
 189  * kernel handles and stores in the prefix page. If we ever need to go beyond
 190  * this, this requires changes to code, but the external uapi can stay.
 191  */
 192 #define SIZE_INTERNAL 16
 193
 194 /*
 195  * Base feature mask that defines default mask for facilities. Consists of the
 196  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 197  */
 198 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 199 /*
 200  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 201  * and defines the facilities that can be enabled via a cpu model.
 202  */
 203 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 204
 205 static unsigned long kvm_s390_fac_size(void)
 206 {
 207         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 208         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 209         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 210                 sizeof(S390_lowcore.stfle_fac_list));
 211
 212         return SIZE_INTERNAL;
 213 }
 214
 215 /* available cpu features supported by kvm */
 216 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 217 /* available subfunctions indicated via query / "test bit" */
 218 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 219
 220 static struct gmap_notifier gmap_notifier;
 221 static struct gmap_notifier vsie_gmap_notifier;
 222 debug_info_t *kvm_s390_dbf;
 223
 224 /* Section: not file related */
 225 int kvm_arch_hardware_enable(void)
 226 {
 227         /* every s390 is virtualization enabled ;-) */
 228         return 0;
 229 }
 230
 231 int kvm_arch_check_processor_compat(void)
 232 {
 233         return 0;
 234 }
 235
 236 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 237                               unsigned long end);
 238
 239 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 240 {
 241         u8 delta_idx = 0;
 242
 243         /*
 244          * The TOD jumps by delta, we have to compensate this by adding
 245          * -delta to the epoch.
 246          */
 247         delta = -delta;
 248
 249         /* sign-extension - we're adding to signed values below */
 250         if ((s64)delta < 0)
 251                 delta_idx = -1;
 252
 253         scb->epoch += delta;
 254         if (scb->ecd & ECD_MEF) {
 255                 scb->epdx += delta_idx;
 256                 if (scb->epoch < delta)
 257                         scb->epdx += 1;
 258         }
 259 }
 260
 261 /*
 262  * This callback is executed during stop_machine(). All CPUs are therefore
 263  * temporarily stopped. In order not to change guest behavior, we have to
 264  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 265  * so a CPU won't be stopped while calculating with the epoch.
 266  */
 267 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 268                           void *v)
 269 {
 270         struct kvm *kvm;
 271         struct kvm_vcpu *vcpu;
 272         int i;
 273         unsigned long long *delta = v;
 274
 275         list_for_each_entry(kvm, &vm_list, vm_list) {
 276                 kvm_for_each_vcpu(i, vcpu, kvm) {
 277                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 278                         if (i == 0) {
 279                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 280                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 281                         }
 282                         if (vcpu->arch.cputm_enabled)
 283                                 vcpu->arch.cputm_start += *delta;
 284                         if (vcpu->arch.vsie_block)
 285                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 286                                                    *delta);
 287                 }
 288         }
 289         return NOTIFY_OK;
 290 }
 291
 292 static struct notifier_block kvm_clock_notifier = {
 293         .notifier_call = kvm_clock_sync,
 294 };
 295
 296 int kvm_arch_hardware_setup(void)
 297 {
 298         gmap_notifier.notifier_call = kvm_gmap_notifier;
 299         gmap_register_pte_notifier(&gmap_notifier);
 300         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 301         gmap_register_pte_notifier(&vsie_gmap_notifier);
 302         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 303                                        &kvm_clock_notifier);
 304         return 0;
 305 }
 306
 307 void kvm_arch_hardware_unsetup(void)
 308 {
 309         gmap_unregister_pte_notifier(&gmap_notifier);
 310         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 311         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 312                                          &kvm_clock_notifier);
 313 }
 314
 315 static void allow_cpu_feat(unsigned long nr)
 316 {
 317         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 318 }
 319
 320 static inline int plo_test_bit(unsigned char nr)
 321 {
 322         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 323         int cc;
 324
 325         asm volatile(
 326                 /* Parameter registers are ignored for "test bit" */
 327                 "       plo     0,0,0,0(0)\n"
 328                 "       ipm     %0\n"
 329                 "       srl     %0,28\n"
 330                 : "=d" (cc)
 331                 : "d" (r0)
 332                 : "cc");
 333         return cc == 0;
 334 }
 335
 336 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 337 {
 338         register unsigned long r0 asm("0") = 0; /* query function */
 339         register unsigned long r1 asm("1") = (unsigned long) query;
 340
 341         asm volatile(
 342                 /* Parameter regs are ignored */
 343                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 344                 :
 345                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
 346                 : "cc", "memory");
 347 }
 348
 349 #define INSN_SORTL 0xb938
 350 #define INSN_DFLTCC 0xb939
 351
 352 static void kvm_s390_cpu_feat_init(void)
 353 {
 354         int i;
 355
 356         for (i = 0; i < 256; ++i) {
 357                 if (plo_test_bit(i))
 358                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 359         }
 360
 361         if (test_facility(28)) /* TOD-clock steering */
 362                 ptff(kvm_s390_available_subfunc.ptff,
 363                      sizeof(kvm_s390_available_subfunc.ptff),
 364                      PTFF_QAF);
 365
 366         if (test_facility(17)) { /* MSA */
 367                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 368                               kvm_s390_available_subfunc.kmac);
 369                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 370                               kvm_s390_available_subfunc.kmc);
 371                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 372                               kvm_s390_available_subfunc.km);
 373                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 374                               kvm_s390_available_subfunc.kimd);
 375                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 376                               kvm_s390_available_subfunc.klmd);
 377         }
 378         if (test_facility(76)) /* MSA3 */
 379                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 380                               kvm_s390_available_subfunc.pckmo);
 381         if (test_facility(77)) { /* MSA4 */
 382                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 383                               kvm_s390_available_subfunc.kmctr);
 384                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 385                               kvm_s390_available_subfunc.kmf);
 386                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 387                               kvm_s390_available_subfunc.kmo);
 388                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 389                               kvm_s390_available_subfunc.pcc);
 390         }
 391         if (test_facility(57)) /* MSA5 */
 392                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 393                               kvm_s390_available_subfunc.ppno);
 394
 395         if (test_facility(146)) /* MSA8 */
 396                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 397                               kvm_s390_available_subfunc.kma);
 398
 399         if (test_facility(155)) /* MSA9 */
 400                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 401                               kvm_s390_available_subfunc.kdsa);
 402
 403         if (test_facility(150)) /* SORTL */
 404                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 405
 406         if (test_facility(151)) /* DFLTCC */
 407                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 408
 409         if (MACHINE_HAS_ESOP)
 410                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 411         /*
 412          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 413          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 414          */
 415         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 416             !test_facility(3) || !nested)
 417                 return;
 418         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 419         if (sclp.has_64bscao)
 420                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 421         if (sclp.has_siif)
 422                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 423         if (sclp.has_gpere)
 424                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 425         if (sclp.has_gsls)
 426                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 427         if (sclp.has_ib)
 428                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 429         if (sclp.has_cei)
 430                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 431         if (sclp.has_ibs)
 432                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 433         if (sclp.has_kss)
 434                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 435         /*
 436          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 437          * all skey handling functions read/set the skey from the PGSTE
 438          * instead of the real storage key.
 439          *
 440          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 441          * pages being detected as preserved although they are resident.
 442          *
 443          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 444          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 445          *
 446          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 447          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 448          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 449          *
 450          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 451          * cannot easily shadow the SCA because of the ipte lock.
 452          */
 453 }
 454
 455 int kvm_arch_init(void *opaque)
 456 {
 457         int rc = -ENOMEM;
 458
 459         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 460         if (!kvm_s390_dbf)
 461                 return -ENOMEM;
 462
 463         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
 464                 goto out;
 465
 466         kvm_s390_cpu_feat_init();
 467
 468         /* Register floating interrupt controller interface. */
 469         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 470         if (rc) {
 471                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 472                 goto out;
 473         }
 474
 475         rc = kvm_s390_gib_init(GAL_ISC);
 476         if (rc)
 477                 goto out;
 478
 479         return 0;
 480
 481 out:
 482         kvm_arch_exit();
 483         return rc;
 484 }
 485
 486 void kvm_arch_exit(void)
 487 {
 488         kvm_s390_gib_destroy();
 489         debug_unregister(kvm_s390_dbf);
 490 }
 491
 492 /* Section: device related */
 493 long kvm_arch_dev_ioctl(struct file *filp,
 494                         unsigned int ioctl, unsigned long arg)
 495 {
 496         if (ioctl == KVM_S390_ENABLE_SIE)
 497                 return s390_enable_sie();
 498         return -EINVAL;
 499 }
 500
 501 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 502 {
 503         int r;
 504
 505         switch (ext) {
 506         case KVM_CAP_S390_PSW:
 507         case KVM_CAP_S390_GMAP:
 508         case KVM_CAP_SYNC_MMU:
 509 #ifdef CONFIG_KVM_S390_UCONTROL
 510         case KVM_CAP_S390_UCONTROL:
 511 #endif
 512         case KVM_CAP_ASYNC_PF:
 513         case KVM_CAP_SYNC_REGS:
 514         case KVM_CAP_ONE_REG:
 515         case KVM_CAP_ENABLE_CAP:
 516         case KVM_CAP_S390_CSS_SUPPORT:
 517         case KVM_CAP_IOEVENTFD:
 518         case KVM_CAP_DEVICE_CTRL:
 519         case KVM_CAP_S390_IRQCHIP:
 520         case KVM_CAP_VM_ATTRIBUTES:
 521         case KVM_CAP_MP_STATE:
 522         case KVM_CAP_IMMEDIATE_EXIT:
 523         case KVM_CAP_S390_INJECT_IRQ:
 524         case KVM_CAP_S390_USER_SIGP:
 525         case KVM_CAP_S390_USER_STSI:
 526         case KVM_CAP_S390_SKEYS:
 527         case KVM_CAP_S390_IRQ_STATE:
 528         case KVM_CAP_S390_USER_INSTR0:
 529         case KVM_CAP_S390_CMMA_MIGRATION:
 530         case KVM_CAP_S390_AIS:
 531         case KVM_CAP_S390_AIS_MIGRATION:
 532                 r = 1;
 533                 break;
 534         case KVM_CAP_S390_HPAGE_1M:
 535                 r = 0;
 536                 if (hpage && !kvm_is_ucontrol(kvm))
 537                         r = 1;
 538                 break;
 539         case KVM_CAP_S390_MEM_OP:
 540                 r = MEM_OP_MAX_SIZE;
 541                 break;
 542         case KVM_CAP_NR_VCPUS:
 543         case KVM_CAP_MAX_VCPUS:
 544         case KVM_CAP_MAX_VCPU_ID:
 545                 r = KVM_S390_BSCA_CPU_SLOTS;
 546                 if (!kvm_s390_use_sca_entries())
 547                         r = KVM_MAX_VCPUS;
 548                 else if (sclp.has_esca && sclp.has_64bscao)
 549                         r = KVM_S390_ESCA_CPU_SLOTS;
 550                 break;
 551         case KVM_CAP_S390_COW:
 552                 r = MACHINE_HAS_ESOP;
 553                 break;
 554         case KVM_CAP_S390_VECTOR_REGISTERS:
 555                 r = MACHINE_HAS_VX;
 556                 break;
 557         case KVM_CAP_S390_RI:
 558                 r = test_facility(64);
 559                 break;
 560         case KVM_CAP_S390_GS:
 561                 r = test_facility(133);
 562                 break;
 563         case KVM_CAP_S390_BPB:
 564                 r = test_facility(82);
 565                 break;
 566         default:
 567                 r = 0;
 568         }
 569         return r;
 570 }
 571
 572 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 573                                     struct kvm_memory_slot *memslot)
 574 {
 575         int i;
 576         gfn_t cur_gfn, last_gfn;
 577         unsigned long gaddr, vmaddr;
 578         struct gmap *gmap = kvm->arch.gmap;
 579         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 580
 581         /* Loop over all guest segments */
 582         cur_gfn = memslot->base_gfn;
 583         last_gfn = memslot->base_gfn + memslot->npages;
 584         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 585                 gaddr = gfn_to_gpa(cur_gfn);
 586                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 587                 if (kvm_is_error_hva(vmaddr))
 588                         continue;
 589
 590                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 591                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 592                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 593                         if (test_bit(i, bitmap))
 594                                 mark_page_dirty(kvm, cur_gfn + i);
 595                 }
 596
 597                 if (fatal_signal_pending(current))
 598                         return;
 599                 cond_resched();
 600         }
 601 }
 602
 603 /* Section: vm related */
 604 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 605
 606 /*
 607  * Get (and clear) the dirty memory log for a memory slot.
 608  */
 609 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 610                                struct kvm_dirty_log *log)
 611 {
 612         int r;
 613         unsigned long n;
 614         struct kvm_memslots *slots;
 615         struct kvm_memory_slot *memslot;
 616         int is_dirty = 0;
 617
 618         if (kvm_is_ucontrol(kvm))
 619                 return -EINVAL;
 620
 621         mutex_lock(&kvm->slots_lock);
 622
 623         r = -EINVAL;
 624         if (log->slot >= KVM_USER_MEM_SLOTS)
 625                 goto out;
 626
 627         slots = kvm_memslots(kvm);
 628         memslot = id_to_memslot(slots, log->slot);
 629         r = -ENOENT;
 630         if (!memslot->dirty_bitmap)
 631                 goto out;
 632
 633         kvm_s390_sync_dirty_log(kvm, memslot);
 634         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 635         if (r)
 636                 goto out;
 637
 638         /* Clear the dirty log */
 639         if (is_dirty) {
 640                 n = kvm_dirty_bitmap_bytes(memslot);
 641                 memset(memslot->dirty_bitmap, 0, n);
 642         }
 643         r = 0;
 644 out:
 645         mutex_unlock(&kvm->slots_lock);
 646         return r;
 647 }
 648
 649 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 650 {
 651         unsigned int i;
 652         struct kvm_vcpu *vcpu;
 653
 654         kvm_for_each_vcpu(i, vcpu, kvm) {
 655                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 656         }
 657 }
 658
 659 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 660 {
 661         int r;
 662
 663         if (cap->flags)
 664                 return -EINVAL;
 665
 666         switch (cap->cap) {
 667         case KVM_CAP_S390_IRQCHIP:
 668                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 669                 kvm->arch.use_irqchip = 1;
 670                 r = 0;
 671                 break;
 672         case KVM_CAP_S390_USER_SIGP:
 673                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 674                 kvm->arch.user_sigp = 1;
 675                 r = 0;
 676                 break;
 677         case KVM_CAP_S390_VECTOR_REGISTERS:
 678                 mutex_lock(&kvm->lock);
 679                 if (kvm->created_vcpus) {
 680                         r = -EBUSY;
 681                 } else if (MACHINE_HAS_VX) {
 682                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 683                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 684                         if (test_facility(134)) {
 685                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 686                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 687                         }
 688                         if (test_facility(135)) {
 689                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 690                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 691                         }
 692                         if (test_facility(148)) {
 693                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 694                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 695                         }
 696                         if (test_facility(152)) {
 697                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 698                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 699                         }
 700                         r = 0;
 701                 } else
 702                         r = -EINVAL;
 703                 mutex_unlock(&kvm->lock);
 704                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 705                          r ? "(not available)" : "(success)");
 706                 break;
 707         case KVM_CAP_S390_RI:
 708                 r = -EINVAL;
 709                 mutex_lock(&kvm->lock);
 710                 if (kvm->created_vcpus) {
 711                         r = -EBUSY;
 712                 } else if (test_facility(64)) {
 713                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 714                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 715                         r = 0;
 716                 }
 717                 mutex_unlock(&kvm->lock);
 718                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 719                          r ? "(not available)" : "(success)");
 720                 break;
 721         case KVM_CAP_S390_AIS:
 722                 mutex_lock(&kvm->lock);
 723                 if (kvm->created_vcpus) {
 724                         r = -EBUSY;
 725                 } else {
 726                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 727                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 728                         r = 0;
 729                 }
 730                 mutex_unlock(&kvm->lock);
 731                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 732                          r ? "(not available)" : "(success)");
 733                 break;
 734         case KVM_CAP_S390_GS:
 735                 r = -EINVAL;
 736                 mutex_lock(&kvm->lock);
 737                 if (kvm->created_vcpus) {
 738                         r = -EBUSY;
 739                 } else if (test_facility(133)) {
 740                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 741                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 742                         r = 0;
 743                 }
 744                 mutex_unlock(&kvm->lock);
 745                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 746                          r ? "(not available)" : "(success)");
 747                 break;
 748         case KVM_CAP_S390_HPAGE_1M:
 749                 mutex_lock(&kvm->lock);
 750                 if (kvm->created_vcpus)
 751                         r = -EBUSY;
 752                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 753                         r = -EINVAL;
 754                 else {
 755                         r = 0;
 756                         down_write(&kvm->mm->mmap_sem);
 757                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 758                         up_write(&kvm->mm->mmap_sem);
 759                         /*
 760                          * We might have to create fake 4k page
 761                          * tables. To avoid that the hardware works on
 762                          * stale PGSTEs, we emulate these instructions.
 763                          */
 764                         kvm->arch.use_skf = 0;
 765                         kvm->arch.use_pfmfi = 0;
 766                 }
 767                 mutex_unlock(&kvm->lock);
 768                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 769                          r ? "(not available)" : "(success)");
 770                 break;
 771         case KVM_CAP_S390_USER_STSI:
 772                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 773                 kvm->arch.user_stsi = 1;
 774                 r = 0;
 775                 break;
 776         case KVM_CAP_S390_USER_INSTR0:
 777                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 778                 kvm->arch.user_instr0 = 1;
 779                 icpt_operexc_on_all_vcpus(kvm);
 780                 r = 0;
 781                 break;
 782         default:
 783                 r = -EINVAL;
 784                 break;
 785         }
 786         return r;
 787 }
 788
 789 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 790 {
 791         int ret;
 792
 793         switch (attr->attr) {
 794         case KVM_S390_VM_MEM_LIMIT_SIZE:
 795                 ret = 0;
 796                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 797                          kvm->arch.mem_limit);
 798                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 799                         ret = -EFAULT;
 800                 break;
 801         default:
 802                 ret = -ENXIO;
 803                 break;
 804         }
 805         return ret;
 806 }
 807
 808 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 809 {
 810         int ret;
 811         unsigned int idx;
 812         switch (attr->attr) {
 813         case KVM_S390_VM_MEM_ENABLE_CMMA:
 814                 ret = -ENXIO;
 815                 if (!sclp.has_cmma)
 816                         break;
 817
 818                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 819                 mutex_lock(&kvm->lock);
 820                 if (kvm->created_vcpus)
 821                         ret = -EBUSY;
 822                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 823                         ret = -EINVAL;
 824                 else {
 825                         kvm->arch.use_cmma = 1;
 826                         /* Not compatible with cmma. */
 827                         kvm->arch.use_pfmfi = 0;
 828                         ret = 0;
 829                 }
 830                 mutex_unlock(&kvm->lock);
 831                 break;
 832         case KVM_S390_VM_MEM_CLR_CMMA:
 833                 ret = -ENXIO;
 834                 if (!sclp.has_cmma)
 835                         break;
 836                 ret = -EINVAL;
 837                 if (!kvm->arch.use_cmma)
 838                         break;
 839
 840                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 841                 mutex_lock(&kvm->lock);
 842                 idx = srcu_read_lock(&kvm->srcu);
 843                 s390_reset_cmma(kvm->arch.gmap->mm);
 844                 srcu_read_unlock(&kvm->srcu, idx);
 845                 mutex_unlock(&kvm->lock);
 846                 ret = 0;
 847                 break;
 848         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 849                 unsigned long new_limit;
 850
 851                 if (kvm_is_ucontrol(kvm))
 852                         return -EINVAL;
 853
 854                 if (get_user(new_limit, (u64 __user *)attr->addr))
 855                         return -EFAULT;
 856
 857                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 858                     new_limit > kvm->arch.mem_limit)
 859                         return -E2BIG;
 860
 861                 if (!new_limit)
 862                         return -EINVAL;
 863
 864                 /* gmap_create takes last usable address */
 865                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 866                         new_limit -= 1;
 867
 868                 ret = -EBUSY;
 869                 mutex_lock(&kvm->lock);
 870                 if (!kvm->created_vcpus) {
 871                         /* gmap_create will round the limit up */
 872                         struct gmap *new = gmap_create(current->mm, new_limit);
 873
 874                         if (!new) {
 875                                 ret = -ENOMEM;
 876                         } else {
 877                                 gmap_remove(kvm->arch.gmap);
 878                                 new->private = kvm;
 879                                 kvm->arch.gmap = new;
 880                                 ret = 0;
 881                         }
 882                 }
 883                 mutex_unlock(&kvm->lock);
 884                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 885                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 886                          (void *) kvm->arch.gmap->asce);
 887                 break;
 888         }
 889         default:
 890                 ret = -ENXIO;
 891                 break;
 892         }
 893         return ret;
 894 }
 895
 896 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 897
 898 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 899 {
 900         struct kvm_vcpu *vcpu;
 901         int i;
 902
 903         kvm_s390_vcpu_block_all(kvm);
 904
 905         kvm_for_each_vcpu(i, vcpu, kvm) {
 906                 kvm_s390_vcpu_crypto_setup(vcpu);
 907                 /* recreate the shadow crycb by leaving the VSIE handler */
 908                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 909         }
 910
 911         kvm_s390_vcpu_unblock_all(kvm);
 912 }
 913
 914 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 915 {
 916         mutex_lock(&kvm->lock);
 917         switch (attr->attr) {
 918         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 919                 if (!test_kvm_facility(kvm, 76)) {
 920                         mutex_unlock(&kvm->lock);
 921                         return -EINVAL;
 922                 }
 923                 get_random_bytes(
 924                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 925                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 926                 kvm->arch.crypto.aes_kw = 1;
 927                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 928                 break;
 929         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 930                 if (!test_kvm_facility(kvm, 76)) {
 931                         mutex_unlock(&kvm->lock);
 932                         return -EINVAL;
 933                 }
 934                 get_random_bytes(
 935                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 936                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 937                 kvm->arch.crypto.dea_kw = 1;
 938                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 939                 break;
 940         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 941                 if (!test_kvm_facility(kvm, 76)) {
 942                         mutex_unlock(&kvm->lock);
 943                         return -EINVAL;
 944                 }
 945                 kvm->arch.crypto.aes_kw = 0;
 946                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 947                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 948                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 949                 break;
 950         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 951                 if (!test_kvm_facility(kvm, 76)) {
 952                         mutex_unlock(&kvm->lock);
 953                         return -EINVAL;
 954                 }
 955                 kvm->arch.crypto.dea_kw = 0;
 956                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 957                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 958                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 959                 break;
 960         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 961                 if (!ap_instructions_available()) {
 962                         mutex_unlock(&kvm->lock);
 963                         return -EOPNOTSUPP;
 964                 }
 965                 kvm->arch.crypto.apie = 1;
 966                 break;
 967         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 968                 if (!ap_instructions_available()) {
 969                         mutex_unlock(&kvm->lock);
 970                         return -EOPNOTSUPP;
 971                 }
 972                 kvm->arch.crypto.apie = 0;
 973                 break;
 974         default:
 975                 mutex_unlock(&kvm->lock);
 976                 return -ENXIO;
 977         }
 978
 979         kvm_s390_vcpu_crypto_reset_all(kvm);
 980         mutex_unlock(&kvm->lock);
 981         return 0;
 982 }
 983
 984 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 985 {
 986         int cx;
 987         struct kvm_vcpu *vcpu;
 988
 989         kvm_for_each_vcpu(cx, vcpu, kvm)
 990                 kvm_s390_sync_request(req, vcpu);
 991 }
 992
 993 /*
 994  * Must be called with kvm->srcu held to avoid races on memslots, and with
 995  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 996  */
 997 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 998 {
 999         struct kvm_memory_slot *ms;
1000         struct kvm_memslots *slots;
1001         unsigned long ram_pages = 0;
1002         int slotnr;
1003
1004         /* migration mode already enabled */
1005         if (kvm->arch.migration_mode)
1006                 return 0;
1007         slots = kvm_memslots(kvm);
1008         if (!slots || !slots->used_slots)
1009                 return -EINVAL;
1010
1011         if (!kvm->arch.use_cmma) {
1012                 kvm->arch.migration_mode = 1;
1013                 return 0;
1014         }
1015         /* mark all the pages in active slots as dirty */
1016         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1017                 ms = slots->memslots + slotnr;
1018                 if (!ms->dirty_bitmap)
1019                         return -EINVAL;
1020                 /*
1021                  * The second half of the bitmap is only used on x86,
1022                  * and would be wasted otherwise, so we put it to good
1023                  * use here to keep track of the state of the storage
1024                  * attributes.
1025                  */
1026                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1027                 ram_pages += ms->npages;
1028         }
1029         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1030         kvm->arch.migration_mode = 1;
1031         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1032         return 0;
1033 }
1034
1035 /*
1036  * Must be called with kvm->slots_lock to avoid races with ourselves and
1037  * kvm_s390_vm_start_migration.
1038  */
1039 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1040 {
1041         /* migration mode already disabled */
1042         if (!kvm->arch.migration_mode)
1043                 return 0;
1044         kvm->arch.migration_mode = 0;
1045         if (kvm->arch.use_cmma)
1046                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1047         return 0;
1048 }
1049
1050 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1051                                      struct kvm_device_attr *attr)
1052 {
1053         int res = -ENXIO;
1054
1055         mutex_lock(&kvm->slots_lock);
1056         switch (attr->attr) {
1057         case KVM_S390_VM_MIGRATION_START:
1058                 res = kvm_s390_vm_start_migration(kvm);
1059                 break;
1060         case KVM_S390_VM_MIGRATION_STOP:
1061                 res = kvm_s390_vm_stop_migration(kvm);
1062                 break;
1063         default:
1064                 break;
1065         }
1066         mutex_unlock(&kvm->slots_lock);
1067
1068         return res;
1069 }
1070
1071 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1072                                      struct kvm_device_attr *attr)
1073 {
1074         u64 mig = kvm->arch.migration_mode;
1075
1076         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1077                 return -ENXIO;
1078
1079         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1080                 return -EFAULT;
1081         return 0;
1082 }
1083
1084 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1085 {
1086         struct kvm_s390_vm_tod_clock gtod;
1087
1088         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1089                 return -EFAULT;
1090
1091         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1092                 return -EINVAL;
1093         kvm_s390_set_tod_clock(kvm, &gtod);
1094
1095         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1096                 gtod.epoch_idx, gtod.tod);
1097
1098         return 0;
1099 }
1100
1101 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1102 {
1103         u8 gtod_high;
1104
1105         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1106                                            sizeof(gtod_high)))
1107                 return -EFAULT;
1108
1109         if (gtod_high != 0)
1110                 return -EINVAL;
1111         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1112
1113         return 0;
1114 }
1115
1116 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1117 {
1118         struct kvm_s390_vm_tod_clock gtod = { 0 };
1119
1120         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1121                            sizeof(gtod.tod)))
1122                 return -EFAULT;
1123
1124         kvm_s390_set_tod_clock(kvm, &gtod);
1125         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1126         return 0;
1127 }
1128
1129 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1130 {
1131         int ret;
1132
1133         if (attr->flags)
1134                 return -EINVAL;
1135
1136         switch (attr->attr) {
1137         case KVM_S390_VM_TOD_EXT:
1138                 ret = kvm_s390_set_tod_ext(kvm, attr);
1139                 break;
1140         case KVM_S390_VM_TOD_HIGH:
1141                 ret = kvm_s390_set_tod_high(kvm, attr);
1142                 break;
1143         case KVM_S390_VM_TOD_LOW:
1144                 ret = kvm_s390_set_tod_low(kvm, attr);
1145                 break;
1146         default:
1147                 ret = -ENXIO;
1148                 break;
1149         }
1150         return ret;
1151 }
1152
1153 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1154                                    struct kvm_s390_vm_tod_clock *gtod)
1155 {
1156         struct kvm_s390_tod_clock_ext htod;
1157
1158         preempt_disable();
1159
1160         get_tod_clock_ext((char *)&htod);
1161
1162         gtod->tod = htod.tod + kvm->arch.epoch;
1163         gtod->epoch_idx = 0;
1164         if (test_kvm_facility(kvm, 139)) {
1165                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1166                 if (gtod->tod < htod.tod)
1167                         gtod->epoch_idx += 1;
1168         }
1169
1170         preempt_enable();
1171 }
1172
1173 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175         struct kvm_s390_vm_tod_clock gtod;
1176
1177         memset(&gtod, 0, sizeof(gtod));
1178         kvm_s390_get_tod_clock(kvm, &gtod);
1179         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1180                 return -EFAULT;
1181
1182         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1183                 gtod.epoch_idx, gtod.tod);
1184         return 0;
1185 }
1186
1187 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1188 {
1189         u8 gtod_high = 0;
1190
1191         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1192                                          sizeof(gtod_high)))
1193                 return -EFAULT;
1194         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1195
1196         return 0;
1197 }
1198
1199 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201         u64 gtod;
1202
1203         gtod = kvm_s390_get_tod_clock_fast(kvm);
1204         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1205                 return -EFAULT;
1206         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1207
1208         return 0;
1209 }
1210
1211 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1212 {
1213         int ret;
1214
1215         if (attr->flags)
1216                 return -EINVAL;
1217
1218         switch (attr->attr) {
1219         case KVM_S390_VM_TOD_EXT:
1220                 ret = kvm_s390_get_tod_ext(kvm, attr);
1221                 break;
1222         case KVM_S390_VM_TOD_HIGH:
1223                 ret = kvm_s390_get_tod_high(kvm, attr);
1224                 break;
1225         case KVM_S390_VM_TOD_LOW:
1226                 ret = kvm_s390_get_tod_low(kvm, attr);
1227                 break;
1228         default:
1229                 ret = -ENXIO;
1230                 break;
1231         }
1232         return ret;
1233 }
1234
1235 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1236 {
1237         struct kvm_s390_vm_cpu_processor *proc;
1238         u16 lowest_ibc, unblocked_ibc;
1239         int ret = 0;
1240
1241         mutex_lock(&kvm->lock);
1242         if (kvm->created_vcpus) {
1243                 ret = -EBUSY;
1244                 goto out;
1245         }
1246         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1247         if (!proc) {
1248                 ret = -ENOMEM;
1249                 goto out;
1250         }
1251         if (!copy_from_user(proc, (void __user *)attr->addr,
1252                             sizeof(*proc))) {
1253                 kvm->arch.model.cpuid = proc->cpuid;
1254                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1255                 unblocked_ibc = sclp.ibc & 0xfff;
1256                 if (lowest_ibc && proc->ibc) {
1257                         if (proc->ibc > unblocked_ibc)
1258                                 kvm->arch.model.ibc = unblocked_ibc;
1259                         else if (proc->ibc < lowest_ibc)
1260                                 kvm->arch.model.ibc = lowest_ibc;
1261                         else
1262                                 kvm->arch.model.ibc = proc->ibc;
1263                 }
1264                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1265                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1266                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1267                          kvm->arch.model.ibc,
1268                          kvm->arch.model.cpuid);
1269                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1270                          kvm->arch.model.fac_list[0],
1271                          kvm->arch.model.fac_list[1],
1272                          kvm->arch.model.fac_list[2]);
1273         } else
1274                 ret = -EFAULT;
1275         kfree(proc);
1276 out:
1277         mutex_unlock(&kvm->lock);
1278         return ret;
1279 }
1280
1281 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1282                                        struct kvm_device_attr *attr)
1283 {
1284         struct kvm_s390_vm_cpu_feat data;
1285
1286         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1287                 return -EFAULT;
1288         if (!bitmap_subset((unsigned long *) data.feat,
1289                            kvm_s390_available_cpu_feat,
1290                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1291                 return -EINVAL;
1292
1293         mutex_lock(&kvm->lock);
1294         if (kvm->created_vcpus) {
1295                 mutex_unlock(&kvm->lock);
1296                 return -EBUSY;
1297         }
1298         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1299                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1300         mutex_unlock(&kvm->lock);
1301         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1302                          data.feat[0],
1303                          data.feat[1],
1304                          data.feat[2]);
1305         return 0;
1306 }
1307
1308 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1309                                           struct kvm_device_attr *attr)
1310 {
1311         mutex_lock(&kvm->lock);
1312         if (kvm->created_vcpus) {
1313                 mutex_unlock(&kvm->lock);
1314                 return -EBUSY;
1315         }
1316
1317         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1318                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1319                 mutex_unlock(&kvm->lock);
1320                 return -EFAULT;
1321         }
1322         mutex_unlock(&kvm->lock);
1323
1324         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1325                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1326                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1327                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1328                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1329         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1330                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1331                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1332         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1333                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1334                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1335         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1336                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1338         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1339                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1340                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1341         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1342                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1343                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1344         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1345                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1346                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1347         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1348                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1349                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1350         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1351                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1353         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1354                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1355                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1356         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1357                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1358                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1359         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1360                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1362         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1365         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1368         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1371         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1376         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1381
1382         return 0;
1383 }
1384
1385 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1386 {
1387         int ret = -ENXIO;
1388
1389         switch (attr->attr) {
1390         case KVM_S390_VM_CPU_PROCESSOR:
1391                 ret = kvm_s390_set_processor(kvm, attr);
1392                 break;
1393         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1394                 ret = kvm_s390_set_processor_feat(kvm, attr);
1395                 break;
1396         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1397                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1398                 break;
1399         }
1400         return ret;
1401 }
1402
1403 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1404 {
1405         struct kvm_s390_vm_cpu_processor *proc;
1406         int ret = 0;
1407
1408         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1409         if (!proc) {
1410                 ret = -ENOMEM;
1411                 goto out;
1412         }
1413         proc->cpuid = kvm->arch.model.cpuid;
1414         proc->ibc = kvm->arch.model.ibc;
1415         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1416                S390_ARCH_FAC_LIST_SIZE_BYTE);
1417         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1418                  kvm->arch.model.ibc,
1419                  kvm->arch.model.cpuid);
1420         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1421                  kvm->arch.model.fac_list[0],
1422                  kvm->arch.model.fac_list[1],
1423                  kvm->arch.model.fac_list[2]);
1424         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1425                 ret = -EFAULT;
1426         kfree(proc);
1427 out:
1428         return ret;
1429 }
1430
1431 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1432 {
1433         struct kvm_s390_vm_cpu_machine *mach;
1434         int ret = 0;
1435
1436         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1437         if (!mach) {
1438                 ret = -ENOMEM;
1439                 goto out;
1440         }
1441         get_cpu_id((struct cpuid *) &mach->cpuid);
1442         mach->ibc = sclp.ibc;
1443         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1444                S390_ARCH_FAC_LIST_SIZE_BYTE);
1445         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1446                sizeof(S390_lowcore.stfle_fac_list));
1447         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1448                  kvm->arch.model.ibc,
1449                  kvm->arch.model.cpuid);
1450         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1451                  mach->fac_mask[0],
1452                  mach->fac_mask[1],
1453                  mach->fac_mask[2]);
1454         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1455                  mach->fac_list[0],
1456                  mach->fac_list[1],
1457                  mach->fac_list[2]);
1458         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1459                 ret = -EFAULT;
1460         kfree(mach);
1461 out:
1462         return ret;
1463 }
1464
1465 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1466                                        struct kvm_device_attr *attr)
1467 {
1468         struct kvm_s390_vm_cpu_feat data;
1469
1470         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1471                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1472         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1473                 return -EFAULT;
1474         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1475                          data.feat[0],
1476                          data.feat[1],
1477                          data.feat[2]);
1478         return 0;
1479 }
1480
1481 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1482                                      struct kvm_device_attr *attr)
1483 {
1484         struct kvm_s390_vm_cpu_feat data;
1485
1486         bitmap_copy((unsigned long *) data.feat,
1487                     kvm_s390_available_cpu_feat,
1488                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1489         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1490                 return -EFAULT;
1491         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1492                          data.feat[0],
1493                          data.feat[1],
1494                          data.feat[2]);
1495         return 0;
1496 }
1497
1498 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1499                                           struct kvm_device_attr *attr)
1500 {
1501         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1502             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1503                 return -EFAULT;
1504
1505         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1506                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1507                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1508                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1509                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1510         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1511                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1512                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1513         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1514                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1515                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1516         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1517                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1519         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1520                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1521                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1522         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1523                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1525         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1526                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1527                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1528         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1529                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1531         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1532                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1534         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1535                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1536                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1537         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1538                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1540         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1541                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1543         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1546         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1549         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1552         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1557         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1562
1563         return 0;
1564 }
1565
1566 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1567                                         struct kvm_device_attr *attr)
1568 {
1569         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1570             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1571                 return -EFAULT;
1572
1573         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1574                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1575                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1576                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1577                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1578         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1579                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1580                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1581         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1582                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1583                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1584         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1585                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1586                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1587         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1588                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1589                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1590         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1591                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1592                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1593         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1594                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1595                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1596         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1597                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1598                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1599         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1600                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1601                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1602         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1603                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1604                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1605         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1606                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1607                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1608         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1609                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1610                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1611         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1612                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1614         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1615                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1616                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1617         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1618                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1620         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1621                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1622                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1625         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1626                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1627                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1628                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1630
1631         return 0;
1632 }
1633
1634 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1635 {
1636         int ret = -ENXIO;
1637
1638         switch (attr->attr) {
1639         case KVM_S390_VM_CPU_PROCESSOR:
1640                 ret = kvm_s390_get_processor(kvm, attr);
1641                 break;
1642         case KVM_S390_VM_CPU_MACHINE:
1643                 ret = kvm_s390_get_machine(kvm, attr);
1644                 break;
1645         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1646                 ret = kvm_s390_get_processor_feat(kvm, attr);
1647                 break;
1648         case KVM_S390_VM_CPU_MACHINE_FEAT:
1649                 ret = kvm_s390_get_machine_feat(kvm, attr);
1650                 break;
1651         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1652                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1653                 break;
1654         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1655                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1656                 break;
1657         }
1658         return ret;
1659 }
1660
1661 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1662 {
1663         int ret;
1664
1665         switch (attr->group) {
1666         case KVM_S390_VM_MEM_CTRL:
1667                 ret = kvm_s390_set_mem_control(kvm, attr);
1668                 break;
1669         case KVM_S390_VM_TOD:
1670                 ret = kvm_s390_set_tod(kvm, attr);
1671                 break;
1672         case KVM_S390_VM_CPU_MODEL:
1673                 ret = kvm_s390_set_cpu_model(kvm, attr);
1674                 break;
1675         case KVM_S390_VM_CRYPTO:
1676                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1677                 break;
1678         case KVM_S390_VM_MIGRATION:
1679                 ret = kvm_s390_vm_set_migration(kvm, attr);
1680                 break;
1681         default:
1682                 ret = -ENXIO;
1683                 break;
1684         }
1685
1686         return ret;
1687 }
1688
1689 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1690 {
1691         int ret;
1692
1693         switch (attr->group) {
1694         case KVM_S390_VM_MEM_CTRL:
1695                 ret = kvm_s390_get_mem_control(kvm, attr);
1696                 break;
1697         case KVM_S390_VM_TOD:
1698                 ret = kvm_s390_get_tod(kvm, attr);
1699                 break;
1700         case KVM_S390_VM_CPU_MODEL:
1701                 ret = kvm_s390_get_cpu_model(kvm, attr);
1702                 break;
1703         case KVM_S390_VM_MIGRATION:
1704                 ret = kvm_s390_vm_get_migration(kvm, attr);
1705                 break;
1706         default:
1707                 ret = -ENXIO;
1708                 break;
1709         }
1710
1711         return ret;
1712 }
1713
1714 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1715 {
1716         int ret;
1717
1718         switch (attr->group) {
1719         case KVM_S390_VM_MEM_CTRL:
1720                 switch (attr->attr) {
1721                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1722                 case KVM_S390_VM_MEM_CLR_CMMA:
1723                         ret = sclp.has_cmma ? 0 : -ENXIO;
1724                         break;
1725                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1726                         ret = 0;
1727                         break;
1728                 default:
1729                         ret = -ENXIO;
1730                         break;
1731                 }
1732                 break;
1733         case KVM_S390_VM_TOD:
1734                 switch (attr->attr) {
1735                 case KVM_S390_VM_TOD_LOW:
1736                 case KVM_S390_VM_TOD_HIGH:
1737                         ret = 0;
1738                         break;
1739                 default:
1740                         ret = -ENXIO;
1741                         break;
1742                 }
1743                 break;
1744         case KVM_S390_VM_CPU_MODEL:
1745                 switch (attr->attr) {
1746                 case KVM_S390_VM_CPU_PROCESSOR:
1747                 case KVM_S390_VM_CPU_MACHINE:
1748                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1749                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1750                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1751                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1752                         ret = 0;
1753                         break;
1754                 default:
1755                         ret = -ENXIO;
1756                         break;
1757                 }
1758                 break;
1759         case KVM_S390_VM_CRYPTO:
1760                 switch (attr->attr) {
1761                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1762                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1763                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1764                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1765                         ret = 0;
1766                         break;
1767                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1768                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1769                         ret = ap_instructions_available() ? 0 : -ENXIO;
1770                         break;
1771                 default:
1772                         ret = -ENXIO;
1773                         break;
1774                 }
1775                 break;
1776         case KVM_S390_VM_MIGRATION:
1777                 ret = 0;
1778                 break;
1779         default:
1780                 ret = -ENXIO;
1781                 break;
1782         }
1783
1784         return ret;
1785 }
1786
1787 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1788 {
1789         uint8_t *keys;
1790         uint64_t hva;
1791         int srcu_idx, i, r = 0;
1792
1793         if (args->flags != 0)
1794                 return -EINVAL;
1795
1796         /* Is this guest using storage keys? */
1797         if (!mm_uses_skeys(current->mm))
1798                 return KVM_S390_GET_SKEYS_NONE;
1799
1800         /* Enforce sane limit on memory allocation */
1801         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1802                 return -EINVAL;
1803
1804         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1805         if (!keys)
1806                 return -ENOMEM;
1807
1808         down_read(&current->mm->mmap_sem);
1809         srcu_idx = srcu_read_lock(&kvm->srcu);
1810         for (i = 0; i < args->count; i++) {
1811                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1812                 if (kvm_is_error_hva(hva)) {
1813                         r = -EFAULT;
1814                         break;
1815                 }
1816
1817                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1818                 if (r)
1819                         break;
1820         }
1821         srcu_read_unlock(&kvm->srcu, srcu_idx);
1822         up_read(&current->mm->mmap_sem);
1823
1824         if (!r) {
1825                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1826                                  sizeof(uint8_t) * args->count);
1827                 if (r)
1828                         r = -EFAULT;
1829         }
1830
1831         kvfree(keys);
1832         return r;
1833 }
1834
1835 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1836 {
1837         uint8_t *keys;
1838         uint64_t hva;
1839         int srcu_idx, i, r = 0;
1840         bool unlocked;
1841
1842         if (args->flags != 0)
1843                 return -EINVAL;
1844
1845         /* Enforce sane limit on memory allocation */
1846         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1847                 return -EINVAL;
1848
1849         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1850         if (!keys)
1851                 return -ENOMEM;
1852
1853         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1854                            sizeof(uint8_t) * args->count);
1855         if (r) {
1856                 r = -EFAULT;
1857                 goto out;
1858         }
1859
1860         /* Enable storage key handling for the guest */
1861         r = s390_enable_skey();
1862         if (r)
1863                 goto out;
1864
1865         i = 0;
1866         down_read(&current->mm->mmap_sem);
1867         srcu_idx = srcu_read_lock(&kvm->srcu);
1868         while (i < args->count) {
1869                 unlocked = false;
1870                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1871                 if (kvm_is_error_hva(hva)) {
1872                         r = -EFAULT;
1873                         break;
1874                 }
1875
1876                 /* Lowest order bit is reserved */
1877                 if (keys[i] & 0x01) {
1878                         r = -EINVAL;
1879                         break;
1880                 }
1881
1882                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1883                 if (r) {
1884                         r = fixup_user_fault(current, current->mm, hva,
1885                                              FAULT_FLAG_WRITE, &unlocked);
1886                         if (r)
1887                                 break;
1888                 }
1889                 if (!r)
1890                         i++;
1891         }
1892         srcu_read_unlock(&kvm->srcu, srcu_idx);
1893         up_read(&current->mm->mmap_sem);
1894 out:
1895         kvfree(keys);
1896         return r;
1897 }
1898
1899 /*
1900  * Base address and length must be sent at the start of each block, therefore
1901  * it's cheaper to send some clean data, as long as it's less than the size of
1902  * two longs.
1903  */
1904 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1905 /* for consistency */
1906 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1907
1908 /*
1909  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1910  * address falls in a hole. In that case the index of one of the memslots
1911  * bordering the hole is returned.
1912  */
1913 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1914 {
1915         int start = 0, end = slots->used_slots;
1916         int slot = atomic_read(&slots->lru_slot);
1917         struct kvm_memory_slot *memslots = slots->memslots;
1918
1919         if (gfn >= memslots[slot].base_gfn &&
1920             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1921                 return slot;
1922
1923         while (start < end) {
1924                 slot = start + (end - start) / 2;
1925
1926                 if (gfn >= memslots[slot].base_gfn)
1927                         end = slot;
1928                 else
1929                         start = slot + 1;
1930         }
1931
1932         if (gfn >= memslots[start].base_gfn &&
1933             gfn < memslots[start].base_gfn + memslots[start].npages) {
1934                 atomic_set(&slots->lru_slot, start);
1935         }
1936
1937         return start;
1938 }
1939
1940 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1941                               u8 *res, unsigned long bufsize)
1942 {
1943         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1944
1945         args->count = 0;
1946         while (args->count < bufsize) {
1947                 hva = gfn_to_hva(kvm, cur_gfn);
1948                 /*
1949                  * We return an error if the first value was invalid, but we
1950                  * return successfully if at least one value was copied.
1951                  */
1952                 if (kvm_is_error_hva(hva))
1953                         return args->count ? 0 : -EFAULT;
1954                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1955                         pgstev = 0;
1956                 res[args->count++] = (pgstev >> 24) & 0x43;
1957                 cur_gfn++;
1958         }
1959
1960         return 0;
1961 }
1962
1963 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1964                                               unsigned long cur_gfn)
1965 {
1966         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1967         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1968         unsigned long ofs = cur_gfn - ms->base_gfn;
1969
1970         if (ms->base_gfn + ms->npages <= cur_gfn) {
1971                 slotidx--;
1972                 /* If we are above the highest slot, wrap around */
1973                 if (slotidx < 0)
1974                         slotidx = slots->used_slots - 1;
1975
1976                 ms = slots->memslots + slotidx;
1977                 ofs = 0;
1978         }
1979         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1980         while ((slotidx > 0) && (ofs >= ms->npages)) {
1981                 slotidx--;
1982                 ms = slots->memslots + slotidx;
1983                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1984         }
1985         return ms->base_gfn + ofs;
1986 }
1987
1988 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1989                              u8 *res, unsigned long bufsize)
1990 {
1991         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1992         struct kvm_memslots *slots = kvm_memslots(kvm);
1993         struct kvm_memory_slot *ms;
1994
1995         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1996         ms = gfn_to_memslot(kvm, cur_gfn);
1997         args->count = 0;
1998         args->start_gfn = cur_gfn;
1999         if (!ms)
2000                 return 0;
2001         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2002         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2003
2004         while (args->count < bufsize) {
2005                 hva = gfn_to_hva(kvm, cur_gfn);
2006                 if (kvm_is_error_hva(hva))
2007                         return 0;
2008                 /* Decrement only if we actually flipped the bit to 0 */
2009                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2010                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2011                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2012                         pgstev = 0;
2013                 /* Save the value */
2014                 res[args->count++] = (pgstev >> 24) & 0x43;
2015                 /* If the next bit is too far away, stop. */
2016                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2017                         return 0;
2018                 /* If we reached the previous "next", find the next one */
2019                 if (cur_gfn == next_gfn)
2020                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2021                 /* Reached the end of memory or of the buffer, stop */
2022                 if ((next_gfn >= mem_end) ||
2023                     (next_gfn - args->start_gfn >= bufsize))
2024                         return 0;
2025                 cur_gfn++;
2026                 /* Reached the end of the current memslot, take the next one. */
2027                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2028                         ms = gfn_to_memslot(kvm, cur_gfn);
2029                         if (!ms)
2030                                 return 0;
2031                 }
2032         }
2033         return 0;
2034 }
2035
2036 /*
2037  * This function searches for the next page with dirty CMMA attributes, and
2038  * saves the attributes in the buffer up to either the end of the buffer or
2039  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2040  * no trailing clean bytes are saved.
2041  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2042  * output buffer will indicate 0 as length.
2043  */
2044 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2045                                   struct kvm_s390_cmma_log *args)
2046 {
2047         unsigned long bufsize;
2048         int srcu_idx, peek, ret;
2049         u8 *values;
2050
2051         if (!kvm->arch.use_cmma)
2052                 return -ENXIO;
2053         /* Invalid/unsupported flags were specified */
2054         if (args->flags & ~KVM_S390_CMMA_PEEK)
2055                 return -EINVAL;
2056         /* Migration mode query, and we are not doing a migration */
2057         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2058         if (!peek && !kvm->arch.migration_mode)
2059                 return -EINVAL;
2060         /* CMMA is disabled or was not used, or the buffer has length zero */
2061         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2062         if (!bufsize || !kvm->mm->context.uses_cmm) {
2063                 memset(args, 0, sizeof(*args));
2064                 return 0;
2065         }
2066         /* We are not peeking, and there are no dirty pages */
2067         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2068                 memset(args, 0, sizeof(*args));
2069                 return 0;
2070         }
2071
2072         values = vmalloc(bufsize);
2073         if (!values)
2074                 return -ENOMEM;
2075
2076         down_read(&kvm->mm->mmap_sem);
2077         srcu_idx = srcu_read_lock(&kvm->srcu);
2078         if (peek)
2079                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2080         else
2081                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2082         srcu_read_unlock(&kvm->srcu, srcu_idx);
2083         up_read(&kvm->mm->mmap_sem);
2084
2085         if (kvm->arch.migration_mode)
2086                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2087         else
2088                 args->remaining = 0;
2089
2090         if (copy_to_user((void __user *)args->values, values, args->count))
2091                 ret = -EFAULT;
2092
2093         vfree(values);
2094         return ret;
2095 }
2096
2097 /*
2098  * This function sets the CMMA attributes for the given pages. If the input
2099  * buffer has zero length, no action is taken, otherwise the attributes are
2100  * set and the mm->context.uses_cmm flag is set.
2101  */
2102 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2103                                   const struct kvm_s390_cmma_log *args)
2104 {
2105         unsigned long hva, mask, pgstev, i;
2106         uint8_t *bits;
2107         int srcu_idx, r = 0;
2108
2109         mask = args->mask;
2110
2111         if (!kvm->arch.use_cmma)
2112                 return -ENXIO;
2113         /* invalid/unsupported flags */
2114         if (args->flags != 0)
2115                 return -EINVAL;
2116         /* Enforce sane limit on memory allocation */
2117         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2118                 return -EINVAL;
2119         /* Nothing to do */
2120         if (args->count == 0)
2121                 return 0;
2122
2123         bits = vmalloc(array_size(sizeof(*bits), args->count));
2124         if (!bits)
2125                 return -ENOMEM;
2126
2127         r = copy_from_user(bits, (void __user *)args->values, args->count);
2128         if (r) {
2129                 r = -EFAULT;
2130                 goto out;
2131         }
2132
2133         down_read(&kvm->mm->mmap_sem);
2134         srcu_idx = srcu_read_lock(&kvm->srcu);
2135         for (i = 0; i < args->count; i++) {
2136                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2137                 if (kvm_is_error_hva(hva)) {
2138                         r = -EFAULT;
2139                         break;
2140                 }
2141
2142                 pgstev = bits[i];
2143                 pgstev = pgstev << 24;
2144                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2145                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2146         }
2147         srcu_read_unlock(&kvm->srcu, srcu_idx);
2148         up_read(&kvm->mm->mmap_sem);
2149
2150         if (!kvm->mm->context.uses_cmm) {
2151                 down_write(&kvm->mm->mmap_sem);
2152                 kvm->mm->context.uses_cmm = 1;
2153                 up_write(&kvm->mm->mmap_sem);
2154         }
2155 out:
2156         vfree(bits);
2157         return r;
2158 }
2159
2160 long kvm_arch_vm_ioctl(struct file *filp,
2161                        unsigned int ioctl, unsigned long arg)
2162 {
2163         struct kvm *kvm = filp->private_data;
2164         void __user *argp = (void __user *)arg;
2165         struct kvm_device_attr attr;
2166         int r;
2167
2168         switch (ioctl) {
2169         case KVM_S390_INTERRUPT: {
2170                 struct kvm_s390_interrupt s390int;
2171
2172                 r = -EFAULT;
2173                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2174                         break;
2175                 r = kvm_s390_inject_vm(kvm, &s390int);
2176                 break;
2177         }
2178         case KVM_CREATE_IRQCHIP: {
2179                 struct kvm_irq_routing_entry routing;
2180
2181                 r = -EINVAL;
2182                 if (kvm->arch.use_irqchip) {
2183                         /* Set up dummy routing. */
2184                         memset(&routing, 0, sizeof(routing));
2185                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2186                 }
2187                 break;
2188         }
2189         case KVM_SET_DEVICE_ATTR: {
2190                 r = -EFAULT;
2191                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2192                         break;
2193                 r = kvm_s390_vm_set_attr(kvm, &attr);
2194                 break;
2195         }
2196         case KVM_GET_DEVICE_ATTR: {
2197                 r = -EFAULT;
2198                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2199                         break;
2200                 r = kvm_s390_vm_get_attr(kvm, &attr);
2201                 break;
2202         }
2203         case KVM_HAS_DEVICE_ATTR: {
2204                 r = -EFAULT;
2205                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2206                         break;
2207                 r = kvm_s390_vm_has_attr(kvm, &attr);
2208                 break;
2209         }
2210         case KVM_S390_GET_SKEYS: {
2211                 struct kvm_s390_skeys args;
2212
2213                 r = -EFAULT;
2214                 if (copy_from_user(&args, argp,
2215                                    sizeof(struct kvm_s390_skeys)))
2216                         break;
2217                 r = kvm_s390_get_skeys(kvm, &args);
2218                 break;
2219         }
2220         case KVM_S390_SET_SKEYS: {
2221                 struct kvm_s390_skeys args;
2222
2223                 r = -EFAULT;
2224                 if (copy_from_user(&args, argp,
2225                                    sizeof(struct kvm_s390_skeys)))
2226                         break;
2227                 r = kvm_s390_set_skeys(kvm, &args);
2228                 break;
2229         }
2230         case KVM_S390_GET_CMMA_BITS: {
2231                 struct kvm_s390_cmma_log args;
2232
2233                 r = -EFAULT;
2234                 if (copy_from_user(&args, argp, sizeof(args)))
2235                         break;
2236                 mutex_lock(&kvm->slots_lock);
2237                 r = kvm_s390_get_cmma_bits(kvm, &args);
2238                 mutex_unlock(&kvm->slots_lock);
2239                 if (!r) {
2240                         r = copy_to_user(argp, &args, sizeof(args));
2241                         if (r)
2242                                 r = -EFAULT;
2243                 }
2244                 break;
2245         }
2246         case KVM_S390_SET_CMMA_BITS: {
2247                 struct kvm_s390_cmma_log args;
2248
2249                 r = -EFAULT;
2250                 if (copy_from_user(&args, argp, sizeof(args)))
2251                         break;
2252                 mutex_lock(&kvm->slots_lock);
2253                 r = kvm_s390_set_cmma_bits(kvm, &args);
2254                 mutex_unlock(&kvm->slots_lock);
2255                 break;
2256         }
2257         default:
2258                 r = -ENOTTY;
2259         }
2260
2261         return r;
2262 }
2263
2264 static int kvm_s390_apxa_installed(void)
2265 {
2266         struct ap_config_info info;
2267
2268         if (ap_instructions_available()) {
2269                 if (ap_qci(&info) == 0)
2270                         return info.apxa;
2271         }
2272
2273         return 0;
2274 }
2275
2276 /*
2277  * The format of the crypto control block (CRYCB) is specified in the 3 low
2278  * order bits of the CRYCB designation (CRYCBD) field as follows:
2279  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2280  *           AP extended addressing (APXA) facility are installed.
2281  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2282  * Format 2: Both the APXA and MSAX3 facilities are installed
2283  */
2284 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2285 {
2286         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2287
2288         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2289         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2290
2291         /* Check whether MSAX3 is installed */
2292         if (!test_kvm_facility(kvm, 76))
2293                 return;
2294
2295         if (kvm_s390_apxa_installed())
2296                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2297         else
2298                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2299 }
2300
2301 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2302                                unsigned long *aqm, unsigned long *adm)
2303 {
2304         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2305
2306         mutex_lock(&kvm->lock);
2307         kvm_s390_vcpu_block_all(kvm);
2308
2309         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2310         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2311                 memcpy(crycb->apcb1.apm, apm, 32);
2312                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2313                          apm[0], apm[1], apm[2], apm[3]);
2314                 memcpy(crycb->apcb1.aqm, aqm, 32);
2315                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2316                          aqm[0], aqm[1], aqm[2], aqm[3]);
2317                 memcpy(crycb->apcb1.adm, adm, 32);
2318                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2319                          adm[0], adm[1], adm[2], adm[3]);
2320                 break;
2321         case CRYCB_FORMAT1:
2322         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2323                 memcpy(crycb->apcb0.apm, apm, 8);
2324                 memcpy(crycb->apcb0.aqm, aqm, 2);
2325                 memcpy(crycb->apcb0.adm, adm, 2);
2326                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2327                          apm[0], *((unsigned short *)aqm),
2328                          *((unsigned short *)adm));
2329                 break;
2330         default:        /* Can not happen */
2331                 break;
2332         }
2333
2334         /* recreate the shadow crycb for each vcpu */
2335         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2336         kvm_s390_vcpu_unblock_all(kvm);
2337         mutex_unlock(&kvm->lock);
2338 }
2339 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2340
2341 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2342 {
2343         mutex_lock(&kvm->lock);
2344         kvm_s390_vcpu_block_all(kvm);
2345
2346         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2347                sizeof(kvm->arch.crypto.crycb->apcb0));
2348         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2349                sizeof(kvm->arch.crypto.crycb->apcb1));
2350
2351         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2352         /* recreate the shadow crycb for each vcpu */
2353         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2354         kvm_s390_vcpu_unblock_all(kvm);
2355         mutex_unlock(&kvm->lock);
2356 }
2357 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2358
2359 static u64 kvm_s390_get_initial_cpuid(void)
2360 {
2361         struct cpuid cpuid;
2362
2363         get_cpu_id(&cpuid);
2364         cpuid.version = 0xff;
2365         return *((u64 *) &cpuid);
2366 }
2367
2368 static void kvm_s390_crypto_init(struct kvm *kvm)
2369 {
2370         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2371         kvm_s390_set_crycb_format(kvm);
2372
2373         if (!test_kvm_facility(kvm, 76))
2374                 return;
2375
2376         /* Enable AES/DEA protected key functions by default */
2377         kvm->arch.crypto.aes_kw = 1;
2378         kvm->arch.crypto.dea_kw = 1;
2379         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2380                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2381         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2382                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2383 }
2384
2385 static void sca_dispose(struct kvm *kvm)
2386 {
2387         if (kvm->arch.use_esca)
2388                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2389         else
2390                 free_page((unsigned long)(kvm->arch.sca));
2391         kvm->arch.sca = NULL;
2392 }
2393
2394 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2395 {
2396         gfp_t alloc_flags = GFP_KERNEL;
2397         int i, rc;
2398         char debug_name[16];
2399         static unsigned long sca_offset;
2400
2401         rc = -EINVAL;
2402 #ifdef CONFIG_KVM_S390_UCONTROL
2403         if (type & ~KVM_VM_S390_UCONTROL)
2404                 goto out_err;
2405         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2406                 goto out_err;
2407 #else
2408         if (type)
2409                 goto out_err;
2410 #endif
2411
2412         rc = s390_enable_sie();
2413         if (rc)
2414                 goto out_err;
2415
2416         rc = -ENOMEM;
2417
2418         if (!sclp.has_64bscao)
2419                 alloc_flags |= GFP_DMA;
2420         rwlock_init(&kvm->arch.sca_lock);
2421         /* start with basic SCA */
2422         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2423         if (!kvm->arch.sca)
2424                 goto out_err;
2425         mutex_lock(&kvm_lock);
2426         sca_offset += 16;
2427         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2428                 sca_offset = 0;
2429         kvm->arch.sca = (struct bsca_block *)
2430                         ((char *) kvm->arch.sca + sca_offset);
2431         mutex_unlock(&kvm_lock);
2432
2433         sprintf(debug_name, "kvm-%u", current->pid);
2434
2435         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2436         if (!kvm->arch.dbf)
2437                 goto out_err;
2438
2439         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2440         kvm->arch.sie_page2 =
2441              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2442         if (!kvm->arch.sie_page2)
2443                 goto out_err;
2444
2445         kvm->arch.sie_page2->kvm = kvm;
2446         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2447
2448         for (i = 0; i < kvm_s390_fac_size(); i++) {
2449                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2450                                               (kvm_s390_fac_base[i] |
2451                                                kvm_s390_fac_ext[i]);
2452                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2453                                               kvm_s390_fac_base[i];
2454         }
2455         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2456
2457         /* we are always in czam mode - even on pre z14 machines */
2458         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2459         set_kvm_facility(kvm->arch.model.fac_list, 138);
2460         /* we emulate STHYI in kvm */
2461         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2462         set_kvm_facility(kvm->arch.model.fac_list, 74);
2463         if (MACHINE_HAS_TLB_GUEST) {
2464                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2465                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2466         }
2467
2468         if (css_general_characteristics.aiv && test_facility(65))
2469                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2470
2471         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2472         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2473
2474         kvm_s390_crypto_init(kvm);
2475
2476         mutex_init(&kvm->arch.float_int.ais_lock);
2477         spin_lock_init(&kvm->arch.float_int.lock);
2478         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2479                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2480         init_waitqueue_head(&kvm->arch.ipte_wq);
2481         mutex_init(&kvm->arch.ipte_mutex);
2482
2483         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2484         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2485
2486         if (type & KVM_VM_S390_UCONTROL) {
2487                 kvm->arch.gmap = NULL;
2488                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2489         } else {
2490                 if (sclp.hamax == U64_MAX)
2491                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2492                 else
2493                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2494                                                     sclp.hamax + 1);
2495                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2496                 if (!kvm->arch.gmap)
2497                         goto out_err;
2498                 kvm->arch.gmap->private = kvm;
2499                 kvm->arch.gmap->pfault_enabled = 0;
2500         }
2501
2502         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2503         kvm->arch.use_skf = sclp.has_skey;
2504         spin_lock_init(&kvm->arch.start_stop_lock);
2505         kvm_s390_vsie_init(kvm);
2506         kvm_s390_gisa_init(kvm);
2507         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2508
2509         return 0;
2510 out_err:
2511         free_page((unsigned long)kvm->arch.sie_page2);
2512         debug_unregister(kvm->arch.dbf);
2513         sca_dispose(kvm);
2514         KVM_EVENT(3, "creation of vm failed: %d", rc);
2515         return rc;
2516 }
2517
2518 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2519 {
2520         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2521         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2522         kvm_s390_clear_local_irqs(vcpu);
2523         kvm_clear_async_pf_completion_queue(vcpu);
2524         if (!kvm_is_ucontrol(vcpu->kvm))
2525                 sca_del_vcpu(vcpu);
2526
2527         if (kvm_is_ucontrol(vcpu->kvm))
2528                 gmap_remove(vcpu->arch.gmap);
2529
2530         if (vcpu->kvm->arch.use_cmma)
2531                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2532         free_page((unsigned long)(vcpu->arch.sie_block));
2533
2534         kvm_vcpu_uninit(vcpu);
2535         kmem_cache_free(kvm_vcpu_cache, vcpu);
2536 }
2537
2538 static void kvm_free_vcpus(struct kvm *kvm)
2539 {
2540         unsigned int i;
2541         struct kvm_vcpu *vcpu;
2542
2543         kvm_for_each_vcpu(i, vcpu, kvm)
2544                 kvm_arch_vcpu_destroy(vcpu);
2545
2546         mutex_lock(&kvm->lock);
2547         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2548                 kvm->vcpus[i] = NULL;
2549
2550         atomic_set(&kvm->online_vcpus, 0);
2551         mutex_unlock(&kvm->lock);
2552 }
2553
2554 void kvm_arch_destroy_vm(struct kvm *kvm)
2555 {
2556         kvm_free_vcpus(kvm);
2557         sca_dispose(kvm);
2558         debug_unregister(kvm->arch.dbf);
2559         kvm_s390_gisa_destroy(kvm);
2560         free_page((unsigned long)kvm->arch.sie_page2);
2561         if (!kvm_is_ucontrol(kvm))
2562                 gmap_remove(kvm->arch.gmap);
2563         kvm_s390_destroy_adapters(kvm);
2564         kvm_s390_clear_float_irqs(kvm);
2565         kvm_s390_vsie_destroy(kvm);
2566         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2567 }
2568
2569 /* Section: vcpu related */
2570 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2571 {
2572         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2573         if (!vcpu->arch.gmap)
2574                 return -ENOMEM;
2575         vcpu->arch.gmap->private = vcpu->kvm;
2576
2577         return 0;
2578 }
2579
2580 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2581 {
2582         if (!kvm_s390_use_sca_entries())
2583                 return;
2584         read_lock(&vcpu->kvm->arch.sca_lock);
2585         if (vcpu->kvm->arch.use_esca) {
2586                 struct esca_block *sca = vcpu->kvm->arch.sca;
2587
2588                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2589                 sca->cpu[vcpu->vcpu_id].sda = 0;
2590         } else {
2591                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2592
2593                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2594                 sca->cpu[vcpu->vcpu_id].sda = 0;
2595         }
2596         read_unlock(&vcpu->kvm->arch.sca_lock);
2597 }
2598
2599 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2600 {
2601         if (!kvm_s390_use_sca_entries()) {
2602                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2603
2604                 /* we still need the basic sca for the ipte control */
2605                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2606                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2607                 return;
2608         }
2609         read_lock(&vcpu->kvm->arch.sca_lock);
2610         if (vcpu->kvm->arch.use_esca) {
2611                 struct esca_block *sca = vcpu->kvm->arch.sca;
2612
2613                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2614                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2615                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2616                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2617                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2618         } else {
2619                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2620
2621                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2622                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2623                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2624                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2625         }
2626         read_unlock(&vcpu->kvm->arch.sca_lock);
2627 }
2628
2629 /* Basic SCA to Extended SCA data copy routines */
2630 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2631 {
2632         d->sda = s->sda;
2633         d->sigp_ctrl.c = s->sigp_ctrl.c;
2634         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2635 }
2636
2637 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2638 {
2639         int i;
2640
2641         d->ipte_control = s->ipte_control;
2642         d->mcn[0] = s->mcn;
2643         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2644                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2645 }
2646
2647 static int sca_switch_to_extended(struct kvm *kvm)
2648 {
2649         struct bsca_block *old_sca = kvm->arch.sca;
2650         struct esca_block *new_sca;
2651         struct kvm_vcpu *vcpu;
2652         unsigned int vcpu_idx;
2653         u32 scaol, scaoh;
2654
2655         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2656         if (!new_sca)
2657                 return -ENOMEM;
2658
2659         scaoh = (u32)((u64)(new_sca) >> 32);
2660         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2661
2662         kvm_s390_vcpu_block_all(kvm);
2663         write_lock(&kvm->arch.sca_lock);
2664
2665         sca_copy_b_to_e(new_sca, old_sca);
2666
2667         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2668                 vcpu->arch.sie_block->scaoh = scaoh;
2669                 vcpu->arch.sie_block->scaol = scaol;
2670                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2671         }
2672         kvm->arch.sca = new_sca;
2673         kvm->arch.use_esca = 1;
2674
2675         write_unlock(&kvm->arch.sca_lock);
2676         kvm_s390_vcpu_unblock_all(kvm);
2677
2678         free_page((unsigned long)old_sca);
2679
2680         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2681                  old_sca, kvm->arch.sca);
2682         return 0;
2683 }
2684
2685 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2686 {
2687         int rc;
2688
2689         if (!kvm_s390_use_sca_entries()) {
2690                 if (id < KVM_MAX_VCPUS)
2691                         return true;
2692                 return false;
2693         }
2694         if (id < KVM_S390_BSCA_CPU_SLOTS)
2695                 return true;
2696         if (!sclp.has_esca || !sclp.has_64bscao)
2697                 return false;
2698
2699         mutex_lock(&kvm->lock);
2700         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2701         mutex_unlock(&kvm->lock);
2702
2703         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2704 }
2705
2706 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2707 {
2708         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2709         kvm_clear_async_pf_completion_queue(vcpu);
2710         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2711                                     KVM_SYNC_GPRS |
2712                                     KVM_SYNC_ACRS |
2713                                     KVM_SYNC_CRS |
2714                                     KVM_SYNC_ARCH0 |
2715                                     KVM_SYNC_PFAULT;
2716         kvm_s390_set_prefix(vcpu, 0);
2717         if (test_kvm_facility(vcpu->kvm, 64))
2718                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2719         if (test_kvm_facility(vcpu->kvm, 82))
2720                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2721         if (test_kvm_facility(vcpu->kvm, 133))
2722                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2723         if (test_kvm_facility(vcpu->kvm, 156))
2724                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2725         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2726          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2727          */
2728         if (MACHINE_HAS_VX)
2729                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2730         else
2731                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2732
2733         if (kvm_is_ucontrol(vcpu->kvm))
2734                 return __kvm_ucontrol_vcpu_init(vcpu);
2735
2736         return 0;
2737 }
2738
2739 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2740 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2741 {
2742         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2743         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2744         vcpu->arch.cputm_start = get_tod_clock_fast();
2745         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2746 }
2747
2748 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2749 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2750 {
2751         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2752         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2753         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2754         vcpu->arch.cputm_start = 0;
2755         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2756 }
2757
2758 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2759 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2760 {
2761         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2762         vcpu->arch.cputm_enabled = true;
2763         __start_cpu_timer_accounting(vcpu);
2764 }
2765
2766 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2767 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2768 {
2769         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2770         __stop_cpu_timer_accounting(vcpu);
2771         vcpu->arch.cputm_enabled = false;
2772 }
2773
2774 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2775 {
2776         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2777         __enable_cpu_timer_accounting(vcpu);
2778         preempt_enable();
2779 }
2780
2781 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2782 {
2783         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2784         __disable_cpu_timer_accounting(vcpu);
2785         preempt_enable();
2786 }
2787
2788 /* set the cpu timer - may only be called from the VCPU thread itself */
2789 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2790 {
2791         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2792         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2793         if (vcpu->arch.cputm_enabled)
2794                 vcpu->arch.cputm_start = get_tod_clock_fast();
2795         vcpu->arch.sie_block->cputm = cputm;
2796         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2797         preempt_enable();
2798 }
2799
2800 /* update and get the cpu timer - can also be called from other VCPU threads */
2801 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2802 {
2803         unsigned int seq;
2804         __u64 value;
2805
2806         if (unlikely(!vcpu->arch.cputm_enabled))
2807                 return vcpu->arch.sie_block->cputm;
2808
2809         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2810         do {
2811                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2812                 /*
2813                  * If the writer would ever execute a read in the critical
2814                  * section, e.g. in irq context, we have a deadlock.
2815                  */
2816                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2817                 value = vcpu->arch.sie_block->cputm;
2818                 /* if cputm_start is 0, accounting is being started/stopped */
2819                 if (likely(vcpu->arch.cputm_start))
2820                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2821         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2822         preempt_enable();
2823         return value;
2824 }
2825
2826 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2827 {
2828
2829         gmap_enable(vcpu->arch.enabled_gmap);
2830         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2831         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2832                 __start_cpu_timer_accounting(vcpu);
2833         vcpu->cpu = cpu;
2834 }
2835
2836 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2837 {
2838         vcpu->cpu = -1;
2839         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2840                 __stop_cpu_timer_accounting(vcpu);
2841         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2842         vcpu->arch.enabled_gmap = gmap_get_enabled();
2843         gmap_disable(vcpu->arch.enabled_gmap);
2844
2845 }
2846
2847 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2848 {
2849         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2850         vcpu->arch.sie_block->gpsw.mask = 0UL;
2851         vcpu->arch.sie_block->gpsw.addr = 0UL;
2852         kvm_s390_set_prefix(vcpu, 0);
2853         kvm_s390_set_cpu_timer(vcpu, 0);
2854         vcpu->arch.sie_block->ckc       = 0UL;
2855         vcpu->arch.sie_block->todpr     = 0;
2856         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2857         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2858                                         CR0_INTERRUPT_KEY_SUBMASK |
2859                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2860         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2861                                         CR14_UNUSED_33 |
2862                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2863         /* make sure the new fpc will be lazily loaded */
2864         save_fpu_regs();
2865         current->thread.fpu.fpc = 0;
2866         vcpu->arch.sie_block->gbea = 1;
2867         vcpu->arch.sie_block->pp = 0;
2868         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2869         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2870         kvm_clear_async_pf_completion_queue(vcpu);
2871         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2872                 kvm_s390_vcpu_stop(vcpu);
2873         kvm_s390_clear_local_irqs(vcpu);
2874 }
2875
2876 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2877 {
2878         mutex_lock(&vcpu->kvm->lock);
2879         preempt_disable();
2880         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2881         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2882         preempt_enable();
2883         mutex_unlock(&vcpu->kvm->lock);
2884         if (!kvm_is_ucontrol(vcpu->kvm)) {
2885                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2886                 sca_add_vcpu(vcpu);
2887         }
2888         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2889                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2890         /* make vcpu_load load the right gmap on the first trigger */
2891         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2892 }
2893
2894 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2895 {
2896         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2897             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2898                 return true;
2899         return false;
2900 }
2901
2902 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2903 {
2904         /* At least one ECC subfunction must be present */
2905         return kvm_has_pckmo_subfunc(kvm, 32) ||
2906                kvm_has_pckmo_subfunc(kvm, 33) ||
2907                kvm_has_pckmo_subfunc(kvm, 34) ||
2908                kvm_has_pckmo_subfunc(kvm, 40) ||
2909                kvm_has_pckmo_subfunc(kvm, 41);
2910
2911 }
2912
2913 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2914 {
2915         /*
2916          * If the AP instructions are not being interpreted and the MSAX3
2917          * facility is not configured for the guest, there is nothing to set up.
2918          */
2919         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2920                 return;
2921
2922         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2923         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2924         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2925         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2926
2927         if (vcpu->kvm->arch.crypto.apie)
2928                 vcpu->arch.sie_block->eca |= ECA_APIE;
2929
2930         /* Set up protected key support */
2931         if (vcpu->kvm->arch.crypto.aes_kw) {
2932                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2933                 /* ecc is also wrapped with AES key */
2934                 if (kvm_has_pckmo_ecc(vcpu->kvm))
2935                         vcpu->arch.sie_block->ecd |= ECD_ECC;
2936         }
2937
2938         if (vcpu->kvm->arch.crypto.dea_kw)
2939                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2940 }
2941
2942 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2943 {
2944         free_page(vcpu->arch.sie_block->cbrlo);
2945         vcpu->arch.sie_block->cbrlo = 0;
2946 }
2947
2948 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2949 {
2950         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2951         if (!vcpu->arch.sie_block->cbrlo)
2952                 return -ENOMEM;
2953         return 0;
2954 }
2955
2956 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2957 {
2958         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2959
2960         vcpu->arch.sie_block->ibc = model->ibc;
2961         if (test_kvm_facility(vcpu->kvm, 7))
2962                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2963 }
2964
2965 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2966 {
2967         int rc = 0;
2968
2969         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2970                                                     CPUSTAT_SM |
2971                                                     CPUSTAT_STOPPED);
2972
2973         if (test_kvm_facility(vcpu->kvm, 78))
2974                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2975         else if (test_kvm_facility(vcpu->kvm, 8))
2976                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2977
2978         kvm_s390_vcpu_setup_model(vcpu);
2979
2980         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2981         if (MACHINE_HAS_ESOP)
2982                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2983         if (test_kvm_facility(vcpu->kvm, 9))
2984                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2985         if (test_kvm_facility(vcpu->kvm, 73))
2986                 vcpu->arch.sie_block->ecb |= ECB_TE;
2987
2988         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2989                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2990         if (test_kvm_facility(vcpu->kvm, 130))
2991                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2992         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2993         if (sclp.has_cei)
2994                 vcpu->arch.sie_block->eca |= ECA_CEI;
2995         if (sclp.has_ib)
2996                 vcpu->arch.sie_block->eca |= ECA_IB;
2997         if (sclp.has_siif)
2998                 vcpu->arch.sie_block->eca |= ECA_SII;
2999         if (sclp.has_sigpif)
3000                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3001         if (test_kvm_facility(vcpu->kvm, 129)) {
3002                 vcpu->arch.sie_block->eca |= ECA_VX;
3003                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3004         }
3005         if (test_kvm_facility(vcpu->kvm, 139))
3006                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3007         if (test_kvm_facility(vcpu->kvm, 156))
3008                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3009         if (vcpu->arch.sie_block->gd) {
3010                 vcpu->arch.sie_block->eca |= ECA_AIV;
3011                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3012                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3013         }
3014         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3015                                         | SDNXC;
3016         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3017
3018         if (sclp.has_kss)
3019                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3020         else
3021                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3022
3023         if (vcpu->kvm->arch.use_cmma) {
3024                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3025                 if (rc)
3026                         return rc;
3027         }
3028         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3029         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3030
3031         vcpu->arch.sie_block->hpid = HPID_KVM;
3032
3033         kvm_s390_vcpu_crypto_setup(vcpu);
3034
3035         return rc;
3036 }
3037
3038 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3039                                       unsigned int id)
3040 {
3041         struct kvm_vcpu *vcpu;
3042         struct sie_page *sie_page;
3043         int rc = -EINVAL;
3044
3045         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3046                 goto out;
3047
3048         rc = -ENOMEM;
3049
3050         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
3051         if (!vcpu)
3052                 goto out;
3053
3054         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3055         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3056         if (!sie_page)
3057                 goto out_free_cpu;
3058
3059         vcpu->arch.sie_block = &sie_page->sie_block;
3060         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3061
3062         /* the real guest size will always be smaller than msl */
3063         vcpu->arch.sie_block->mso = 0;
3064         vcpu->arch.sie_block->msl = sclp.hamax;
3065
3066         vcpu->arch.sie_block->icpua = id;
3067         spin_lock_init(&vcpu->arch.local_int.lock);
3068         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
3069         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3070                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3071         seqcount_init(&vcpu->arch.cputm_seqcount);
3072
3073         rc = kvm_vcpu_init(vcpu, kvm, id);
3074         if (rc)
3075                 goto out_free_sie_block;
3076         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
3077                  vcpu->arch.sie_block);
3078         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
3079
3080         return vcpu;
3081 out_free_sie_block:
3082         free_page((unsigned long)(vcpu->arch.sie_block));
3083 out_free_cpu:
3084         kmem_cache_free(kvm_vcpu_cache, vcpu);
3085 out:
3086         return ERR_PTR(rc);
3087 }
3088
3089 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3090 {
3091         return kvm_s390_vcpu_has_irq(vcpu, 0);
3092 }
3093
3094 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3095 {
3096         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3097 }
3098
3099 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3100 {
3101         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3102         exit_sie(vcpu);
3103 }
3104
3105 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3106 {
3107         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3108 }
3109
3110 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3111 {
3112         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3113         exit_sie(vcpu);
3114 }
3115
3116 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3117 {
3118         return atomic_read(&vcpu->arch.sie_block->prog20) &
3119                (PROG_BLOCK_SIE | PROG_REQUEST);
3120 }
3121
3122 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3123 {
3124         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3125 }
3126
3127 /*
3128  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3129  * If the CPU is not running (e.g. waiting as idle) the function will
3130  * return immediately. */
3131 void exit_sie(struct kvm_vcpu *vcpu)
3132 {
3133         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3134         kvm_s390_vsie_kick(vcpu);
3135         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3136                 cpu_relax();
3137 }
3138
3139 /* Kick a guest cpu out of SIE to process a request synchronously */
3140 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3141 {
3142         kvm_make_request(req, vcpu);
3143         kvm_s390_vcpu_request(vcpu);
3144 }
3145
3146 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3147                               unsigned long end)
3148 {
3149         struct kvm *kvm = gmap->private;
3150         struct kvm_vcpu *vcpu;
3151         unsigned long prefix;
3152         int i;
3153
3154         if (gmap_is_shadow(gmap))
3155                 return;
3156         if (start >= 1UL << 31)
3157                 /* We are only interested in prefix pages */
3158                 return;
3159         kvm_for_each_vcpu(i, vcpu, kvm) {
3160                 /* match against both prefix pages */
3161                 prefix = kvm_s390_get_prefix(vcpu);
3162                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3163                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3164                                    start, end);
3165                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3166                 }
3167         }
3168 }
3169
3170 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3171 {
3172         /* do not poll with more than halt_poll_max_steal percent of steal time */
3173         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3174             halt_poll_max_steal) {
3175                 vcpu->stat.halt_no_poll_steal++;
3176                 return true;
3177         }
3178         return false;
3179 }
3180
3181 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3182 {
3183         /* kvm common code refers to this, but never calls it */
3184         BUG();
3185         return 0;
3186 }
3187
3188 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3189                                            struct kvm_one_reg *reg)
3190 {
3191         int r = -EINVAL;
3192
3193         switch (reg->id) {
3194         case KVM_REG_S390_TODPR:
3195                 r = put_user(vcpu->arch.sie_block->todpr,
3196                              (u32 __user *)reg->addr);
3197                 break;
3198         case KVM_REG_S390_EPOCHDIFF:
3199                 r = put_user(vcpu->arch.sie_block->epoch,
3200                              (u64 __user *)reg->addr);
3201                 break;
3202         case KVM_REG_S390_CPU_TIMER:
3203                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3204                              (u64 __user *)reg->addr);
3205                 break;
3206         case KVM_REG_S390_CLOCK_COMP:
3207                 r = put_user(vcpu->arch.sie_block->ckc,
3208                              (u64 __user *)reg->addr);
3209                 break;
3210         case KVM_REG_S390_PFTOKEN:
3211                 r = put_user(vcpu->arch.pfault_token,
3212                              (u64 __user *)reg->addr);
3213                 break;
3214         case KVM_REG_S390_PFCOMPARE:
3215                 r = put_user(vcpu->arch.pfault_compare,
3216                              (u64 __user *)reg->addr);
3217                 break;
3218         case KVM_REG_S390_PFSELECT:
3219                 r = put_user(vcpu->arch.pfault_select,
3220                              (u64 __user *)reg->addr);
3221                 break;
3222         case KVM_REG_S390_PP:
3223                 r = put_user(vcpu->arch.sie_block->pp,
3224                              (u64 __user *)reg->addr);
3225                 break;
3226         case KVM_REG_S390_GBEA:
3227                 r = put_user(vcpu->arch.sie_block->gbea,
3228                              (u64 __user *)reg->addr);
3229                 break;
3230         default:
3231                 break;
3232         }
3233
3234         return r;
3235 }
3236
3237 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3238                                            struct kvm_one_reg *reg)
3239 {
3240         int r = -EINVAL;
3241         __u64 val;
3242
3243         switch (reg->id) {
3244         case KVM_REG_S390_TODPR:
3245                 r = get_user(vcpu->arch.sie_block->todpr,
3246                              (u32 __user *)reg->addr);
3247                 break;
3248         case KVM_REG_S390_EPOCHDIFF:
3249                 r = get_user(vcpu->arch.sie_block->epoch,
3250                              (u64 __user *)reg->addr);
3251                 break;
3252         case KVM_REG_S390_CPU_TIMER:
3253                 r = get_user(val, (u64 __user *)reg->addr);
3254                 if (!r)
3255                         kvm_s390_set_cpu_timer(vcpu, val);
3256                 break;
3257         case KVM_REG_S390_CLOCK_COMP:
3258                 r = get_user(vcpu->arch.sie_block->ckc,
3259                              (u64 __user *)reg->addr);
3260                 break;
3261         case KVM_REG_S390_PFTOKEN:
3262                 r = get_user(vcpu->arch.pfault_token,
3263                              (u64 __user *)reg->addr);
3264                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3265                         kvm_clear_async_pf_completion_queue(vcpu);
3266                 break;
3267         case KVM_REG_S390_PFCOMPARE:
3268                 r = get_user(vcpu->arch.pfault_compare,
3269                              (u64 __user *)reg->addr);
3270                 break;
3271         case KVM_REG_S390_PFSELECT:
3272                 r = get_user(vcpu->arch.pfault_select,
3273                              (u64 __user *)reg->addr);
3274                 break;
3275         case KVM_REG_S390_PP:
3276                 r = get_user(vcpu->arch.sie_block->pp,
3277                              (u64 __user *)reg->addr);
3278                 break;
3279         case KVM_REG_S390_GBEA:
3280                 r = get_user(vcpu->arch.sie_block->gbea,
3281                              (u64 __user *)reg->addr);
3282                 break;
3283         default:
3284                 break;
3285         }
3286
3287         return r;
3288 }
3289
3290 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3291 {
3292         kvm_s390_vcpu_initial_reset(vcpu);
3293         return 0;
3294 }
3295
3296 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3297 {
3298         vcpu_load(vcpu);
3299         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3300         vcpu_put(vcpu);
3301         return 0;
3302 }
3303
3304 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3305 {
3306         vcpu_load(vcpu);
3307         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3308         vcpu_put(vcpu);
3309         return 0;
3310 }
3311
3312 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3313                                   struct kvm_sregs *sregs)
3314 {
3315         vcpu_load(vcpu);
3316
3317         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3318         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3319
3320         vcpu_put(vcpu);
3321         return 0;
3322 }
3323
3324 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3325                                   struct kvm_sregs *sregs)
3326 {
3327         vcpu_load(vcpu);
3328
3329         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3330         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3331
3332         vcpu_put(vcpu);
3333         return 0;
3334 }
3335
3336 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3337 {
3338         int ret = 0;
3339
3340         vcpu_load(vcpu);
3341
3342         if (test_fp_ctl(fpu->fpc)) {
3343                 ret = -EINVAL;
3344                 goto out;
3345         }
3346         vcpu->run->s.regs.fpc = fpu->fpc;
3347         if (MACHINE_HAS_VX)
3348                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3349                                  (freg_t *) fpu->fprs);
3350         else
3351                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3352
3353 out:
3354         vcpu_put(vcpu);
3355         return ret;
3356 }
3357
3358 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3359 {
3360         vcpu_load(vcpu);
3361
3362         /* make sure we have the latest values */
3363         save_fpu_regs();
3364         if (MACHINE_HAS_VX)
3365                 convert_vx_to_fp((freg_t *) fpu->fprs,
3366                                  (__vector128 *) vcpu->run->s.regs.vrs);
3367         else
3368                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3369         fpu->fpc = vcpu->run->s.regs.fpc;
3370
3371         vcpu_put(vcpu);
3372         return 0;
3373 }
3374
3375 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3376 {
3377         int rc = 0;
3378
3379         if (!is_vcpu_stopped(vcpu))
3380                 rc = -EBUSY;
3381         else {
3382                 vcpu->run->psw_mask = psw.mask;
3383                 vcpu->run->psw_addr = psw.addr;
3384         }
3385         return rc;
3386 }
3387
3388 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3389                                   struct kvm_translation *tr)
3390 {
3391         return -EINVAL; /* not implemented yet */
3392 }
3393
3394 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3395                               KVM_GUESTDBG_USE_HW_BP | \
3396                               KVM_GUESTDBG_ENABLE)
3397
3398 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3399                                         struct kvm_guest_debug *dbg)
3400 {
3401         int rc = 0;
3402
3403         vcpu_load(vcpu);
3404
3405         vcpu->guest_debug = 0;
3406         kvm_s390_clear_bp_data(vcpu);
3407
3408         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3409                 rc = -EINVAL;
3410                 goto out;
3411         }
3412         if (!sclp.has_gpere) {
3413                 rc = -EINVAL;
3414                 goto out;
3415         }
3416
3417         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3418                 vcpu->guest_debug = dbg->control;
3419                 /* enforce guest PER */
3420                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3421
3422                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3423                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3424         } else {
3425                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3426                 vcpu->arch.guestdbg.last_bp = 0;
3427         }
3428
3429         if (rc) {
3430                 vcpu->guest_debug = 0;
3431                 kvm_s390_clear_bp_data(vcpu);
3432                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3433         }
3434
3435 out:
3436         vcpu_put(vcpu);
3437         return rc;
3438 }
3439
3440 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3441                                     struct kvm_mp_state *mp_state)
3442 {
3443         int ret;
3444
3445         vcpu_load(vcpu);
3446
3447         /* CHECK_STOP and LOAD are not supported yet */
3448         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3449                                       KVM_MP_STATE_OPERATING;
3450
3451         vcpu_put(vcpu);
3452         return ret;
3453 }
3454
3455 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3456                                     struct kvm_mp_state *mp_state)
3457 {
3458         int rc = 0;
3459
3460         vcpu_load(vcpu);
3461
3462         /* user space knows about this interface - let it control the state */
3463         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3464
3465         switch (mp_state->mp_state) {
3466         case KVM_MP_STATE_STOPPED:
3467                 kvm_s390_vcpu_stop(vcpu);
3468                 break;
3469         case KVM_MP_STATE_OPERATING:
3470                 kvm_s390_vcpu_start(vcpu);
3471                 break;
3472         case KVM_MP_STATE_LOAD:
3473         case KVM_MP_STATE_CHECK_STOP:
3474                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3475         default:
3476                 rc = -ENXIO;
3477         }
3478
3479         vcpu_put(vcpu);
3480         return rc;
3481 }
3482
3483 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3484 {
3485         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3486 }
3487
3488 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3489 {
3490 retry:
3491         kvm_s390_vcpu_request_handled(vcpu);
3492         if (!kvm_request_pending(vcpu))
3493                 return 0;
3494         /*
3495          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3496          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3497          * This ensures that the ipte instruction for this request has
3498          * already finished. We might race against a second unmapper that
3499          * wants to set the blocking bit. Lets just retry the request loop.
3500          */
3501         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3502                 int rc;
3503                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3504                                           kvm_s390_get_prefix(vcpu),
3505                                           PAGE_SIZE * 2, PROT_WRITE);
3506                 if (rc) {
3507                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3508                         return rc;
3509                 }
3510                 goto retry;
3511         }
3512
3513         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3514                 vcpu->arch.sie_block->ihcpu = 0xffff;
3515                 goto retry;
3516         }
3517
3518         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3519                 if (!ibs_enabled(vcpu)) {
3520                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3521                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3522                 }
3523                 goto retry;
3524         }
3525
3526         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3527                 if (ibs_enabled(vcpu)) {
3528                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3529                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3530                 }
3531                 goto retry;
3532         }
3533
3534         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3535                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3536                 goto retry;
3537         }
3538
3539         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3540                 /*
3541                  * Disable CMM virtualization; we will emulate the ESSA
3542                  * instruction manually, in order to provide additional
3543                  * functionalities needed for live migration.
3544                  */
3545                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3546                 goto retry;
3547         }
3548
3549         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3550                 /*
3551                  * Re-enable CMM virtualization if CMMA is available and
3552                  * CMM has been used.
3553                  */
3554                 if ((vcpu->kvm->arch.use_cmma) &&
3555                     (vcpu->kvm->mm->context.uses_cmm))
3556                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3557                 goto retry;
3558         }
3559
3560         /* nothing to do, just clear the request */
3561         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3562         /* we left the vsie handler, nothing to do, just clear the request */
3563         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3564
3565         return 0;
3566 }
3567
3568 void kvm_s390_set_tod_clock(struct kvm *kvm,
3569                             const struct kvm_s390_vm_tod_clock *gtod)
3570 {
3571         struct kvm_vcpu *vcpu;
3572         struct kvm_s390_tod_clock_ext htod;
3573         int i;
3574
3575         mutex_lock(&kvm->lock);
3576         preempt_disable();
3577
3578         get_tod_clock_ext((char *)&htod);
3579
3580         kvm->arch.epoch = gtod->tod - htod.tod;
3581         kvm->arch.epdx = 0;
3582         if (test_kvm_facility(kvm, 139)) {
3583                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3584                 if (kvm->arch.epoch > gtod->tod)
3585                         kvm->arch.epdx -= 1;
3586         }
3587
3588         kvm_s390_vcpu_block_all(kvm);
3589         kvm_for_each_vcpu(i, vcpu, kvm) {
3590                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3591                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3592         }
3593
3594         kvm_s390_vcpu_unblock_all(kvm);
3595         preempt_enable();
3596         mutex_unlock(&kvm->lock);
3597 }
3598
3599 /**
3600  * kvm_arch_fault_in_page - fault-in guest page if necessary
3601  * @vcpu: The corresponding virtual cpu
3602  * @gpa: Guest physical address
3603  * @writable: Whether the page should be writable or not
3604  *
3605  * Make sure that a guest page has been faulted-in on the host.
3606  *
3607  * Return: Zero on success, negative error code otherwise.
3608  */
3609 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3610 {
3611         return gmap_fault(vcpu->arch.gmap, gpa,
3612                           writable ? FAULT_FLAG_WRITE : 0);
3613 }
3614
3615 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3616                                       unsigned long token)
3617 {
3618         struct kvm_s390_interrupt inti;
3619         struct kvm_s390_irq irq;
3620
3621         if (start_token) {
3622                 irq.u.ext.ext_params2 = token;
3623                 irq.type = KVM_S390_INT_PFAULT_INIT;
3624                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3625         } else {
3626                 inti.type = KVM_S390_INT_PFAULT_DONE;
3627                 inti.parm64 = token;
3628                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3629         }
3630 }
3631
3632 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3633                                      struct kvm_async_pf *work)
3634 {
3635         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3636         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3637 }
3638
3639 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3640                                  struct kvm_async_pf *work)
3641 {
3642         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3643         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3644 }
3645
3646 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3647                                struct kvm_async_pf *work)
3648 {
3649         /* s390 will always inject the page directly */
3650 }
3651
3652 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3653 {
3654         /*
3655          * s390 will always inject the page directly,
3656          * but we still want check_async_completion to cleanup
3657          */
3658         return true;
3659 }
3660
3661 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3662 {
3663         hva_t hva;
3664         struct kvm_arch_async_pf arch;
3665         int rc;
3666
3667         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3668                 return 0;
3669         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3670             vcpu->arch.pfault_compare)
3671                 return 0;
3672         if (psw_extint_disabled(vcpu))
3673                 return 0;
3674         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3675                 return 0;
3676         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3677                 return 0;
3678         if (!vcpu->arch.gmap->pfault_enabled)
3679                 return 0;
3680
3681         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3682         hva += current->thread.gmap_addr & ~PAGE_MASK;
3683         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3684                 return 0;
3685
3686         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3687         return rc;
3688 }
3689
3690 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3691 {
3692         int rc, cpuflags;
3693
3694         /*
3695          * On s390 notifications for arriving pages will be delivered directly
3696          * to the guest but the house keeping for completed pfaults is
3697          * handled outside the worker.
3698          */
3699         kvm_check_async_pf_completion(vcpu);
3700
3701         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3702         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3703
3704         if (need_resched())
3705                 schedule();
3706
3707         if (test_cpu_flag(CIF_MCCK_PENDING))
3708                 s390_handle_mcck();
3709
3710         if (!kvm_is_ucontrol(vcpu->kvm)) {
3711                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3712                 if (rc)
3713                         return rc;
3714         }
3715
3716         rc = kvm_s390_handle_requests(vcpu);
3717         if (rc)
3718                 return rc;
3719
3720         if (guestdbg_enabled(vcpu)) {
3721                 kvm_s390_backup_guest_per_regs(vcpu);
3722                 kvm_s390_patch_guest_per_regs(vcpu);
3723         }
3724
3725         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3726
3727         vcpu->arch.sie_block->icptcode = 0;
3728         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3729         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3730         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3731
3732         return 0;
3733 }
3734
3735 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3736 {
3737         struct kvm_s390_pgm_info pgm_info = {
3738                 .code = PGM_ADDRESSING,
3739         };
3740         u8 opcode, ilen;
3741         int rc;
3742
3743         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3744         trace_kvm_s390_sie_fault(vcpu);
3745
3746         /*
3747          * We want to inject an addressing exception, which is defined as a
3748          * suppressing or terminating exception. However, since we came here
3749          * by a DAT access exception, the PSW still points to the faulting
3750          * instruction since DAT exceptions are nullifying. So we've got
3751          * to look up the current opcode to get the length of the instruction
3752          * to be able to forward the PSW.
3753          */
3754         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3755         ilen = insn_length(opcode);
3756         if (rc < 0) {
3757                 return rc;
3758         } else if (rc) {
3759                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3760                  * Forward by arbitrary ilc, injection will take care of
3761                  * nullification if necessary.
3762                  */
3763                 pgm_info = vcpu->arch.pgm;
3764                 ilen = 4;
3765         }
3766         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3767         kvm_s390_forward_psw(vcpu, ilen);
3768         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3769 }
3770
3771 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3772 {
3773         struct mcck_volatile_info *mcck_info;
3774         struct sie_page *sie_page;
3775
3776         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3777                    vcpu->arch.sie_block->icptcode);
3778         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3779
3780         if (guestdbg_enabled(vcpu))
3781                 kvm_s390_restore_guest_per_regs(vcpu);
3782
3783         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3784         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3785
3786         if (exit_reason == -EINTR) {
3787                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3788                 sie_page = container_of(vcpu->arch.sie_block,
3789                                         struct sie_page, sie_block);
3790                 mcck_info = &sie_page->mcck_info;
3791                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3792                 return 0;
3793         }
3794
3795         if (vcpu->arch.sie_block->icptcode > 0) {
3796                 int rc = kvm_handle_sie_intercept(vcpu);
3797
3798                 if (rc != -EOPNOTSUPP)
3799                         return rc;
3800                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3801                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3802                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3803                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3804                 return -EREMOTE;
3805         } else if (exit_reason != -EFAULT) {
3806                 vcpu->stat.exit_null++;
3807                 return 0;
3808         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3809                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3810                 vcpu->run->s390_ucontrol.trans_exc_code =
3811                                                 current->thread.gmap_addr;
3812                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3813                 return -EREMOTE;
3814         } else if (current->thread.gmap_pfault) {
3815                 trace_kvm_s390_major_guest_pfault(vcpu);
3816                 current->thread.gmap_pfault = 0;
3817                 if (kvm_arch_setup_async_pf(vcpu))
3818                         return 0;
3819                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3820         }
3821         return vcpu_post_run_fault_in_sie(vcpu);
3822 }
3823
3824 static int __vcpu_run(struct kvm_vcpu *vcpu)
3825 {
3826         int rc, exit_reason;
3827
3828         /*
3829          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3830          * ning the guest), so that memslots (and other stuff) are protected
3831          */
3832         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3833
3834         do {
3835                 rc = vcpu_pre_run(vcpu);
3836                 if (rc)
3837                         break;
3838
3839                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3840                 /*
3841                  * As PF_VCPU will be used in fault handler, between
3842                  * guest_enter and guest_exit should be no uaccess.
3843                  */
3844                 local_irq_disable();
3845                 guest_enter_irqoff();
3846                 __disable_cpu_timer_accounting(vcpu);
3847                 local_irq_enable();
3848                 exit_reason = sie64a(vcpu->arch.sie_block,
3849                                      vcpu->run->s.regs.gprs);
3850                 local_irq_disable();
3851                 __enable_cpu_timer_accounting(vcpu);
3852                 guest_exit_irqoff();
3853                 local_irq_enable();
3854                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3855
3856                 rc = vcpu_post_run(vcpu, exit_reason);
3857         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3858
3859         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3860         return rc;
3861 }
3862
3863 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3864 {
3865         struct runtime_instr_cb *riccb;
3866         struct gs_cb *gscb;
3867
3868         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3869         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3870         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3871         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3872         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3873                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3874         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3875                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3876                 /* some control register changes require a tlb flush */
3877                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3878         }
3879         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3880                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3881                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3882                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3883                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3884                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3885         }
3886         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3887                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3888                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3889                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3890                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3891                         kvm_clear_async_pf_completion_queue(vcpu);
3892         }
3893         /*
3894          * If userspace sets the riccb (e.g. after migration) to a valid state,
3895          * we should enable RI here instead of doing the lazy enablement.
3896          */
3897         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3898             test_kvm_facility(vcpu->kvm, 64) &&
3899             riccb->v &&
3900             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3901                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3902                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3903         }
3904         /*
3905          * If userspace sets the gscb (e.g. after migration) to non-zero,
3906          * we should enable GS here instead of doing the lazy enablement.
3907          */
3908         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3909             test_kvm_facility(vcpu->kvm, 133) &&
3910             gscb->gssm &&
3911             !vcpu->arch.gs_enabled) {
3912                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3913                 vcpu->arch.sie_block->ecb |= ECB_GS;
3914                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3915                 vcpu->arch.gs_enabled = 1;
3916         }
3917         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3918             test_kvm_facility(vcpu->kvm, 82)) {
3919                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3920                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3921         }
3922         save_access_regs(vcpu->arch.host_acrs);
3923         restore_access_regs(vcpu->run->s.regs.acrs);
3924         /* save host (userspace) fprs/vrs */
3925         save_fpu_regs();
3926         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3927         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3928         if (MACHINE_HAS_VX)
3929                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3930         else
3931                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3932         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3933         if (test_fp_ctl(current->thread.fpu.fpc))
3934                 /* User space provided an invalid FPC, let's clear it */
3935                 current->thread.fpu.fpc = 0;
3936         if (MACHINE_HAS_GS) {
3937                 preempt_disable();
3938                 __ctl_set_bit(2, 4);
3939                 if (current->thread.gs_cb) {
3940                         vcpu->arch.host_gscb = current->thread.gs_cb;
3941                         save_gs_cb(vcpu->arch.host_gscb);
3942                 }
3943                 if (vcpu->arch.gs_enabled) {
3944                         current->thread.gs_cb = (struct gs_cb *)
3945                                                 &vcpu->run->s.regs.gscb;
3946                         restore_gs_cb(current->thread.gs_cb);
3947                 }
3948                 preempt_enable();
3949         }
3950         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3951
3952         kvm_run->kvm_dirty_regs = 0;
3953 }
3954
3955 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3956 {
3957         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3958         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3959         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3960         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3961         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3962         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3963         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3964         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3965         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3966         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3967         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3968         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3969         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3970         save_access_regs(vcpu->run->s.regs.acrs);
3971         restore_access_regs(vcpu->arch.host_acrs);
3972         /* Save guest register state */
3973         save_fpu_regs();
3974         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3975         /* Restore will be done lazily at return */
3976         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3977         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3978         if (MACHINE_HAS_GS) {
3979                 __ctl_set_bit(2, 4);
3980                 if (vcpu->arch.gs_enabled)
3981                         save_gs_cb(current->thread.gs_cb);
3982                 preempt_disable();
3983                 current->thread.gs_cb = vcpu->arch.host_gscb;
3984                 restore_gs_cb(vcpu->arch.host_gscb);
3985                 preempt_enable();
3986                 if (!vcpu->arch.host_gscb)
3987                         __ctl_clear_bit(2, 4);
3988                 vcpu->arch.host_gscb = NULL;
3989         }
3990         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3991 }
3992
3993 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3994 {
3995         int rc;
3996
3997         if (kvm_run->immediate_exit)
3998                 return -EINTR;
3999
4000         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4001             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4002                 return -EINVAL;
4003
4004         vcpu_load(vcpu);
4005
4006         if (guestdbg_exit_pending(vcpu)) {
4007                 kvm_s390_prepare_debug_exit(vcpu);
4008                 rc = 0;
4009                 goto out;
4010         }
4011
4012         kvm_sigset_activate(vcpu);
4013
4014         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4015                 kvm_s390_vcpu_start(vcpu);
4016         } else if (is_vcpu_stopped(vcpu)) {
4017                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4018                                    vcpu->vcpu_id);
4019                 rc = -EINVAL;
4020                 goto out;
4021         }
4022
4023         sync_regs(vcpu, kvm_run);
4024         enable_cpu_timer_accounting(vcpu);
4025
4026         might_fault();
4027         rc = __vcpu_run(vcpu);
4028
4029         if (signal_pending(current) && !rc) {
4030                 kvm_run->exit_reason = KVM_EXIT_INTR;
4031                 rc = -EINTR;
4032         }
4033
4034         if (guestdbg_exit_pending(vcpu) && !rc)  {
4035                 kvm_s390_prepare_debug_exit(vcpu);
4036                 rc = 0;
4037         }
4038
4039         if (rc == -EREMOTE) {
4040                 /* userspace support is needed, kvm_run has been prepared */
4041                 rc = 0;
4042         }
4043
4044         disable_cpu_timer_accounting(vcpu);
4045         store_regs(vcpu, kvm_run);
4046
4047         kvm_sigset_deactivate(vcpu);
4048
4049         vcpu->stat.exit_userspace++;
4050 out:
4051         vcpu_put(vcpu);
4052         return rc;
4053 }
4054
4055 /*
4056  * store status at address
4057  * we use have two special cases:
4058  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4059  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4060  */
4061 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4062 {
4063         unsigned char archmode = 1;
4064         freg_t fprs[NUM_FPRS];
4065         unsigned int px;
4066         u64 clkcomp, cputm;
4067         int rc;
4068
4069         px = kvm_s390_get_prefix(vcpu);
4070         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4071                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4072                         return -EFAULT;
4073                 gpa = 0;
4074         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4075                 if (write_guest_real(vcpu, 163, &archmode, 1))
4076                         return -EFAULT;
4077                 gpa = px;
4078         } else
4079                 gpa -= __LC_FPREGS_SAVE_AREA;
4080
4081         /* manually convert vector registers if necessary */
4082         if (MACHINE_HAS_VX) {
4083                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4084                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4085                                      fprs, 128);
4086         } else {
4087                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4088                                      vcpu->run->s.regs.fprs, 128);
4089         }
4090         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4091                               vcpu->run->s.regs.gprs, 128);
4092         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4093                               &vcpu->arch.sie_block->gpsw, 16);
4094         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4095                               &px, 4);
4096         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4097                               &vcpu->run->s.regs.fpc, 4);
4098         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4099                               &vcpu->arch.sie_block->todpr, 4);
4100         cputm = kvm_s390_get_cpu_timer(vcpu);
4101         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4102                               &cputm, 8);
4103         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4104         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4105                               &clkcomp, 8);
4106         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4107                               &vcpu->run->s.regs.acrs, 64);
4108         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4109                               &vcpu->arch.sie_block->gcr, 128);
4110         return rc ? -EFAULT : 0;
4111 }
4112
4113 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4114 {
4115         /*
4116          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4117          * switch in the run ioctl. Let's update our copies before we save
4118          * it into the save area
4119          */
4120         save_fpu_regs();
4121         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4122         save_access_regs(vcpu->run->s.regs.acrs);
4123
4124         return kvm_s390_store_status_unloaded(vcpu, addr);
4125 }
4126
4127 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4128 {
4129         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4130         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4131 }
4132
4133 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4134 {
4135         unsigned int i;
4136         struct kvm_vcpu *vcpu;
4137
4138         kvm_for_each_vcpu(i, vcpu, kvm) {
4139                 __disable_ibs_on_vcpu(vcpu);
4140         }
4141 }
4142
4143 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4144 {
4145         if (!sclp.has_ibs)
4146                 return;
4147         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4148         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4149 }
4150
4151 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4152 {
4153         int i, online_vcpus, started_vcpus = 0;
4154
4155         if (!is_vcpu_stopped(vcpu))
4156                 return;
4157
4158         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4159         /* Only one cpu at a time may enter/leave the STOPPED state. */
4160         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4161         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4162
4163         for (i = 0; i < online_vcpus; i++) {
4164                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4165                         started_vcpus++;
4166         }
4167
4168         if (started_vcpus == 0) {
4169                 /* we're the only active VCPU -> speed it up */
4170                 __enable_ibs_on_vcpu(vcpu);
4171         } else if (started_vcpus == 1) {
4172                 /*
4173                  * As we are starting a second VCPU, we have to disable
4174                  * the IBS facility on all VCPUs to remove potentially
4175                  * oustanding ENABLE requests.
4176                  */
4177                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4178         }
4179
4180         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4181         /*
4182          * Another VCPU might have used IBS while we were offline.
4183          * Let's play safe and flush the VCPU at startup.
4184          */
4185         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4186         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4187         return;
4188 }
4189
4190 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4191 {
4192         int i, online_vcpus, started_vcpus = 0;
4193         struct kvm_vcpu *started_vcpu = NULL;
4194
4195         if (is_vcpu_stopped(vcpu))
4196                 return;
4197
4198         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4199         /* Only one cpu at a time may enter/leave the STOPPED state. */
4200         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4201         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4202
4203         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4204         kvm_s390_clear_stop_irq(vcpu);
4205
4206         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4207         __disable_ibs_on_vcpu(vcpu);
4208
4209         for (i = 0; i < online_vcpus; i++) {
4210                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4211                         started_vcpus++;
4212                         started_vcpu = vcpu->kvm->vcpus[i];
4213                 }
4214         }
4215
4216         if (started_vcpus == 1) {
4217                 /*
4218                  * As we only have one VCPU left, we want to enable the
4219                  * IBS facility for that VCPU to speed it up.
4220                  */
4221                 __enable_ibs_on_vcpu(started_vcpu);
4222         }
4223
4224         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4225         return;
4226 }
4227
4228 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4229                                      struct kvm_enable_cap *cap)
4230 {
4231         int r;
4232
4233         if (cap->flags)
4234                 return -EINVAL;
4235
4236         switch (cap->cap) {
4237         case KVM_CAP_S390_CSS_SUPPORT:
4238                 if (!vcpu->kvm->arch.css_support) {
4239                         vcpu->kvm->arch.css_support = 1;
4240                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4241                         trace_kvm_s390_enable_css(vcpu->kvm);
4242                 }
4243                 r = 0;
4244                 break;
4245         default:
4246                 r = -EINVAL;
4247                 break;
4248         }
4249         return r;
4250 }
4251
4252 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4253                                   struct kvm_s390_mem_op *mop)
4254 {
4255         void __user *uaddr = (void __user *)mop->buf;
4256         void *tmpbuf = NULL;
4257         int r, srcu_idx;
4258         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4259                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4260
4261         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4262                 return -EINVAL;
4263
4264         if (mop->size > MEM_OP_MAX_SIZE)
4265                 return -E2BIG;
4266
4267         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4268                 tmpbuf = vmalloc(mop->size);
4269                 if (!tmpbuf)
4270                         return -ENOMEM;
4271         }
4272
4273         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4274
4275         switch (mop->op) {
4276         case KVM_S390_MEMOP_LOGICAL_READ:
4277                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4278                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4279                                             mop->size, GACC_FETCH);
4280                         break;
4281                 }
4282                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4283                 if (r == 0) {
4284                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4285                                 r = -EFAULT;
4286                 }
4287                 break;
4288         case KVM_S390_MEMOP_LOGICAL_WRITE:
4289                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4290                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4291                                             mop->size, GACC_STORE);
4292                         break;
4293                 }
4294                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4295                         r = -EFAULT;
4296                         break;
4297                 }
4298                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4299                 break;
4300         default:
4301                 r = -EINVAL;
4302         }
4303
4304         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4305
4306         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4307                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4308
4309         vfree(tmpbuf);
4310         return r;
4311 }
4312
4313 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4314                                unsigned int ioctl, unsigned long arg)
4315 {
4316         struct kvm_vcpu *vcpu = filp->private_data;
4317         void __user *argp = (void __user *)arg;
4318
4319         switch (ioctl) {
4320         case KVM_S390_IRQ: {
4321                 struct kvm_s390_irq s390irq;
4322
4323                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4324                         return -EFAULT;
4325                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4326         }
4327         case KVM_S390_INTERRUPT: {
4328                 struct kvm_s390_interrupt s390int;
4329                 struct kvm_s390_irq s390irq = {};
4330
4331                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4332                         return -EFAULT;
4333                 if (s390int_to_s390irq(&s390int, &s390irq))
4334                         return -EINVAL;
4335                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4336         }
4337         }
4338         return -ENOIOCTLCMD;
4339 }
4340
4341 long kvm_arch_vcpu_ioctl(struct file *filp,
4342                          unsigned int ioctl, unsigned long arg)
4343 {
4344         struct kvm_vcpu *vcpu = filp->private_data;
4345         void __user *argp = (void __user *)arg;
4346         int idx;
4347         long r;
4348
4349         vcpu_load(vcpu);
4350
4351         switch (ioctl) {
4352         case KVM_S390_STORE_STATUS:
4353                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4354                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4355                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4356                 break;
4357         case KVM_S390_SET_INITIAL_PSW: {
4358                 psw_t psw;
4359
4360                 r = -EFAULT;
4361                 if (copy_from_user(&psw, argp, sizeof(psw)))
4362                         break;
4363                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4364                 break;
4365         }
4366         case KVM_S390_INITIAL_RESET:
4367                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4368                 break;
4369         case KVM_SET_ONE_REG:
4370         case KVM_GET_ONE_REG: {
4371                 struct kvm_one_reg reg;
4372                 r = -EFAULT;
4373                 if (copy_from_user(&reg, argp, sizeof(reg)))
4374                         break;
4375                 if (ioctl == KVM_SET_ONE_REG)
4376                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4377                 else
4378                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4379                 break;
4380         }
4381 #ifdef CONFIG_KVM_S390_UCONTROL
4382         case KVM_S390_UCAS_MAP: {
4383                 struct kvm_s390_ucas_mapping ucasmap;
4384
4385                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4386                         r = -EFAULT;
4387                         break;
4388                 }
4389
4390                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4391                         r = -EINVAL;
4392                         break;
4393                 }
4394
4395                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4396                                      ucasmap.vcpu_addr, ucasmap.length);
4397                 break;
4398         }
4399         case KVM_S390_UCAS_UNMAP: {
4400                 struct kvm_s390_ucas_mapping ucasmap;
4401
4402                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4403                         r = -EFAULT;
4404                         break;
4405                 }
4406
4407                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4408                         r = -EINVAL;
4409                         break;
4410                 }
4411
4412                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4413                         ucasmap.length);
4414                 break;
4415         }
4416 #endif
4417         case KVM_S390_VCPU_FAULT: {
4418                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4419                 break;
4420         }
4421         case KVM_ENABLE_CAP:
4422         {
4423                 struct kvm_enable_cap cap;
4424                 r = -EFAULT;
4425                 if (copy_from_user(&cap, argp, sizeof(cap)))
4426                         break;
4427                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4428                 break;
4429         }
4430         case KVM_S390_MEM_OP: {
4431                 struct kvm_s390_mem_op mem_op;
4432
4433                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4434                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4435                 else
4436                         r = -EFAULT;
4437                 break;
4438         }
4439         case KVM_S390_SET_IRQ_STATE: {
4440                 struct kvm_s390_irq_state irq_state;
4441
4442                 r = -EFAULT;
4443                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4444                         break;
4445                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4446                     irq_state.len == 0 ||
4447                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4448                         r = -EINVAL;
4449                         break;
4450                 }
4451                 /* do not use irq_state.flags, it will break old QEMUs */
4452                 r = kvm_s390_set_irq_state(vcpu,
4453                                            (void __user *) irq_state.buf,
4454                                            irq_state.len);
4455                 break;
4456         }
4457         case KVM_S390_GET_IRQ_STATE: {
4458                 struct kvm_s390_irq_state irq_state;
4459
4460                 r = -EFAULT;
4461                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4462                         break;
4463                 if (irq_state.len == 0) {
4464                         r = -EINVAL;
4465                         break;
4466                 }
4467                 /* do not use irq_state.flags, it will break old QEMUs */
4468                 r = kvm_s390_get_irq_state(vcpu,
4469                                            (__u8 __user *)  irq_state.buf,
4470                                            irq_state.len);
4471                 break;
4472         }
4473         default:
4474                 r = -ENOTTY;
4475         }
4476
4477         vcpu_put(vcpu);
4478         return r;
4479 }
4480
4481 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4482 {
4483 #ifdef CONFIG_KVM_S390_UCONTROL
4484         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4485                  && (kvm_is_ucontrol(vcpu->kvm))) {
4486                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4487                 get_page(vmf->page);
4488                 return 0;
4489         }
4490 #endif
4491         return VM_FAULT_SIGBUS;
4492 }
4493
4494 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4495                             unsigned long npages)
4496 {
4497         return 0;
4498 }
4499
4500 /* Section: memory related */
4501 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4502                                    struct kvm_memory_slot *memslot,
4503                                    const struct kvm_userspace_memory_region *mem,
4504                                    enum kvm_mr_change change)
4505 {
4506         /* A few sanity checks. We can have memory slots which have to be
4507            located/ended at a segment boundary (1MB). The memory in userland is
4508            ok to be fragmented into various different vmas. It is okay to mmap()
4509            and munmap() stuff in this slot after doing this call at any time */
4510
4511         if (mem->userspace_addr & 0xffffful)
4512                 return -EINVAL;
4513
4514         if (mem->memory_size & 0xffffful)
4515                 return -EINVAL;
4516
4517         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4518                 return -EINVAL;
4519
4520         return 0;
4521 }
4522
4523 void kvm_arch_commit_memory_region(struct kvm *kvm,
4524                                 const struct kvm_userspace_memory_region *mem,
4525                                 const struct kvm_memory_slot *old,
4526                                 const struct kvm_memory_slot *new,
4527                                 enum kvm_mr_change change)
4528 {
4529         int rc = 0;
4530
4531         switch (change) {
4532         case KVM_MR_DELETE:
4533                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4534                                         old->npages * PAGE_SIZE);
4535                 break;
4536         case KVM_MR_MOVE:
4537                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4538                                         old->npages * PAGE_SIZE);
4539                 if (rc)
4540                         break;
4541                 /* FALLTHROUGH */
4542         case KVM_MR_CREATE:
4543                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4544                                       mem->guest_phys_addr, mem->memory_size);
4545                 break;
4546         case KVM_MR_FLAGS_ONLY:
4547                 break;
4548         default:
4549                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4550         }
4551         if (rc)
4552                 pr_warn("failed to commit memory region\n");
4553         return;
4554 }
4555
4556 static inline unsigned long nonhyp_mask(int i)
4557 {
4558         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4559
4560         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4561 }
4562
4563 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4564 {
4565         vcpu->valid_wakeup = false;
4566 }
4567
4568 static int __init kvm_s390_init(void)
4569 {
4570         int i;
4571
4572         if (!sclp.has_sief2) {
4573                 pr_info("SIE is not available\n");
4574                 return -ENODEV;
4575         }
4576
4577         if (nested && hpage) {
4578                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4579                 return -EINVAL;
4580         }
4581
4582         for (i = 0; i < 16; i++)
4583                 kvm_s390_fac_base[i] |=
4584                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4585
4586         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4587 }
4588
4589 static void __exit kvm_s390_exit(void)
4590 {
4591         kvm_exit();
4592 }
4593
4594 module_init(kvm_s390_init);
4595 module_exit(kvm_s390_exit);
4596
4597 /*
4598  * Enable autoloading of the kvm module.
4599  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4600  * since x86 takes a different approach.
4601  */
4602 #include <linux/miscdevice.h>
4603 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4604 MODULE_ALIAS("devname:kvm");