virt/kvm/arm/vgic/vgic.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) 2015, 2016 ARM Ltd.
   4  */
   5
   6 #include <linux/interrupt.h>
   7 #include <linux/irq.h>
   8 #include <linux/kvm.h>
   9 #include <linux/kvm_host.h>
  10 #include <linux/list_sort.h>
  11 #include <linux/nospec.h>
  12
  13 #include <asm/kvm_hyp.h>
  14
  15 #include "vgic.h"
  16
  17 #define CREATE_TRACE_POINTS
  18 #include "trace.h"
  19
  20 struct vgic_global kvm_vgic_global_state __ro_after_init = {
  21         .gicv3_cpuif = STATIC_KEY_FALSE_INIT,
  22 };
  23
  24 /*
  25  * Locking order is always:
  26  * kvm->lock (mutex)
  27  *   its->cmd_lock (mutex)
  28  *     its->its_lock (mutex)
  29  *       vgic_cpu->ap_list_lock         must be taken with IRQs disabled
  30  *         kvm->lpi_list_lock           must be taken with IRQs disabled
  31  *           vgic_irq->irq_lock         must be taken with IRQs disabled
  32  *
  33  * As the ap_list_lock might be taken from the timer interrupt handler,
  34  * we have to disable IRQs before taking this lock and everything lower
  35  * than it.
  36  *
  37  * If you need to take multiple locks, always take the upper lock first,
  38  * then the lower ones, e.g. first take the its_lock, then the irq_lock.
  39  * If you are already holding a lock and need to take a higher one, you
  40  * have to drop the lower ranking lock first and re-aquire it after having
  41  * taken the upper one.
  42  *
  43  * When taking more than one ap_list_lock at the same time, always take the
  44  * lowest numbered VCPU's ap_list_lock first, so:
  45  *   vcpuX->vcpu_id < vcpuY->vcpu_id:
  46  *     raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
  47  *     raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
  48  *
  49  * Since the VGIC must support injecting virtual interrupts from ISRs, we have
  50  * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer
  51  * spinlocks for any lock that may be taken while injecting an interrupt.
  52  */
  53
  54 /*
  55  * Iterate over the VM's list of mapped LPIs to find the one with a
  56  * matching interrupt ID and return a reference to the IRQ structure.
  57  */
  58 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
  59 {
  60         struct vgic_dist *dist = &kvm->arch.vgic;
  61         struct vgic_irq *irq = NULL;
  62         unsigned long flags;
  63
  64         raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
  65
  66         list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
  67                 if (irq->intid != intid)
  68                         continue;
  69
  70                 /*
  71                  * This increases the refcount, the caller is expected to
  72                  * call vgic_put_irq() later once it's finished with the IRQ.
  73                  */
  74                 vgic_get_irq_kref(irq);
  75                 goto out_unlock;
  76         }
  77         irq = NULL;
  78
  79 out_unlock:
  80         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
  81
  82         return irq;
  83 }
  84
  85 /*
  86  * This looks up the virtual interrupt ID to get the corresponding
  87  * struct vgic_irq. It also increases the refcount, so any caller is expected
  88  * to call vgic_put_irq() once it's finished with this IRQ.
  89  */
  90 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
  91                               u32 intid)
  92 {
  93         /* SGIs and PPIs */
  94         if (intid <= VGIC_MAX_PRIVATE) {
  95                 intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1);
  96                 return &vcpu->arch.vgic_cpu.private_irqs[intid];
  97         }
  98
  99         /* SPIs */
 100         if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
 101                 intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS);
 102                 return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
 103         }
 104
 105         /* LPIs */
 106         if (intid >= VGIC_MIN_LPI)
 107                 return vgic_get_lpi(kvm, intid);
 108
 109         WARN(1, "Looking up struct vgic_irq for reserved INTID");
 110         return NULL;
 111 }
 112
 113 /*
 114  * We can't do anything in here, because we lack the kvm pointer to
 115  * lock and remove the item from the lpi_list. So we keep this function
 116  * empty and use the return value of kref_put() to trigger the freeing.
 117  */
 118 static void vgic_irq_release(struct kref *ref)
 119 {
 120 }
 121
 122 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 123 {
 124         struct vgic_dist *dist = &kvm->arch.vgic;
 125         unsigned long flags;
 126
 127         if (irq->intid < VGIC_MIN_LPI)
 128                 return;
 129
 130         raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
 131         if (!kref_put(&irq->refcount, vgic_irq_release)) {
 132                 raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 133                 return;
 134         };
 135
 136         list_del(&irq->lpi_list);
 137         dist->lpi_list_count--;
 138         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 139
 140         kfree(irq);
 141 }
 142
 143 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
 144 {
 145         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 146         struct vgic_irq *irq, *tmp;
 147         unsigned long flags;
 148
 149         raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
 150
 151         list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
 152                 if (irq->intid >= VGIC_MIN_LPI) {
 153                         raw_spin_lock(&irq->irq_lock);
 154                         list_del(&irq->ap_list);
 155                         irq->vcpu = NULL;
 156                         raw_spin_unlock(&irq->irq_lock);
 157                         vgic_put_irq(vcpu->kvm, irq);
 158                 }
 159         }
 160
 161         raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
 162 }
 163
 164 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
 165 {
 166         WARN_ON(irq_set_irqchip_state(irq->host_irq,
 167                                       IRQCHIP_STATE_PENDING,
 168                                       pending));
 169 }
 170
 171 bool vgic_get_phys_line_level(struct vgic_irq *irq)
 172 {
 173         bool line_level;
 174
 175         BUG_ON(!irq->hw);
 176
 177         if (irq->get_input_level)
 178                 return irq->get_input_level(irq->intid);
 179
 180         WARN_ON(irq_get_irqchip_state(irq->host_irq,
 181                                       IRQCHIP_STATE_PENDING,
 182                                       &line_level));
 183         return line_level;
 184 }
 185
 186 /* Set/Clear the physical active state */
 187 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
 188 {
 189
 190         BUG_ON(!irq->hw);
 191         WARN_ON(irq_set_irqchip_state(irq->host_irq,
 192                                       IRQCHIP_STATE_ACTIVE,
 193                                       active));
 194 }
 195
 196 /**
 197  * kvm_vgic_target_oracle - compute the target vcpu for an irq
 198  *
 199  * @irq:        The irq to route. Must be already locked.
 200  *
 201  * Based on the current state of the interrupt (enabled, pending,
 202  * active, vcpu and target_vcpu), compute the next vcpu this should be
 203  * given to. Return NULL if this shouldn't be injected at all.
 204  *
 205  * Requires the IRQ lock to be held.
 206  */
 207 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
 208 {
 209         lockdep_assert_held(&irq->irq_lock);
 210
 211         /* If the interrupt is active, it must stay on the current vcpu */
 212         if (irq->active)
 213                 return irq->vcpu ? : irq->target_vcpu;
 214
 215         /*
 216          * If the IRQ is not active but enabled and pending, we should direct
 217          * it to its configured target VCPU.
 218          * If the distributor is disabled, pending interrupts shouldn't be
 219          * forwarded.
 220          */
 221         if (irq->enabled && irq_is_pending(irq)) {
 222                 if (unlikely(irq->target_vcpu &&
 223                              !irq->target_vcpu->kvm->arch.vgic.enabled))
 224                         return NULL;
 225
 226                 return irq->target_vcpu;
 227         }
 228
 229         /* If neither active nor pending and enabled, then this IRQ should not
 230          * be queued to any VCPU.
 231          */
 232         return NULL;
 233 }
 234
 235 /*
 236  * The order of items in the ap_lists defines how we'll pack things in LRs as
 237  * well, the first items in the list being the first things populated in the
 238  * LRs.
 239  *
 240  * A hard rule is that active interrupts can never be pushed out of the LRs
 241  * (and therefore take priority) since we cannot reliably trap on deactivation
 242  * of IRQs and therefore they have to be present in the LRs.
 243  *
 244  * Otherwise things should be sorted by the priority field and the GIC
 245  * hardware support will take care of preemption of priority groups etc.
 246  *
 247  * Return negative if "a" sorts before "b", 0 to preserve order, and positive
 248  * to sort "b" before "a".
 249  */
 250 static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
 251 {
 252         struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
 253         struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
 254         bool penda, pendb;
 255         int ret;
 256
 257         raw_spin_lock(&irqa->irq_lock);
 258         raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
 259
 260         if (irqa->active || irqb->active) {
 261                 ret = (int)irqb->active - (int)irqa->active;
 262                 goto out;
 263         }
 264
 265         penda = irqa->enabled && irq_is_pending(irqa);
 266         pendb = irqb->enabled && irq_is_pending(irqb);
 267
 268         if (!penda || !pendb) {
 269                 ret = (int)pendb - (int)penda;
 270                 goto out;
 271         }
 272
 273         /* Both pending and enabled, sort by priority */
 274         ret = irqa->priority - irqb->priority;
 275 out:
 276         raw_spin_unlock(&irqb->irq_lock);
 277         raw_spin_unlock(&irqa->irq_lock);
 278         return ret;
 279 }
 280
 281 /* Must be called with the ap_list_lock held */
 282 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
 283 {
 284         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 285
 286         lockdep_assert_held(&vgic_cpu->ap_list_lock);
 287
 288         list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
 289 }
 290
 291 /*
 292  * Only valid injection if changing level for level-triggered IRQs or for a
 293  * rising edge, and in-kernel connected IRQ lines can only be controlled by
 294  * their owner.
 295  */
 296 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
 297 {
 298         if (irq->owner != owner)
 299                 return false;
 300
 301         switch (irq->config) {
 302         case VGIC_CONFIG_LEVEL:
 303                 return irq->line_level != level;
 304         case VGIC_CONFIG_EDGE:
 305                 return level;
 306         }
 307
 308         return false;
 309 }
 310
 311 /*
 312  * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
 313  * Do the queuing if necessary, taking the right locks in the right order.
 314  * Returns true when the IRQ was queued, false otherwise.
 315  *
 316  * Needs to be entered with the IRQ lock already held, but will return
 317  * with all locks dropped.
 318  */
 319 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
 320                            unsigned long flags)
 321 {
 322         struct kvm_vcpu *vcpu;
 323
 324         lockdep_assert_held(&irq->irq_lock);
 325
 326 retry:
 327         vcpu = vgic_target_oracle(irq);
 328         if (irq->vcpu || !vcpu) {
 329                 /*
 330                  * If this IRQ is already on a VCPU's ap_list, then it
 331                  * cannot be moved or modified and there is no more work for
 332                  * us to do.
 333                  *
 334                  * Otherwise, if the irq is not pending and enabled, it does
 335                  * not need to be inserted into an ap_list and there is also
 336                  * no more work for us to do.
 337                  */
 338                 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 339
 340                 /*
 341                  * We have to kick the VCPU here, because we could be
 342                  * queueing an edge-triggered interrupt for which we
 343                  * get no EOI maintenance interrupt. In that case,
 344                  * while the IRQ is already on the VCPU's AP list, the
 345                  * VCPU could have EOI'ed the original interrupt and
 346                  * won't see this one until it exits for some other
 347                  * reason.
 348                  */
 349                 if (vcpu) {
 350                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 351                         kvm_vcpu_kick(vcpu);
 352                 }
 353                 return false;
 354         }
 355
 356         /*
 357          * We must unlock the irq lock to take the ap_list_lock where
 358          * we are going to insert this new pending interrupt.
 359          */
 360         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 361
 362         /* someone can do stuff here, which we re-check below */
 363
 364         raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 365         raw_spin_lock(&irq->irq_lock);
 366
 367         /*
 368          * Did something change behind our backs?
 369          *
 370          * There are two cases:
 371          * 1) The irq lost its pending state or was disabled behind our
 372          *    backs and/or it was queued to another VCPU's ap_list.
 373          * 2) Someone changed the affinity on this irq behind our
 374          *    backs and we are now holding the wrong ap_list_lock.
 375          *
 376          * In both cases, drop the locks and retry.
 377          */
 378
 379         if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
 380                 raw_spin_unlock(&irq->irq_lock);
 381                 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock,
 382                                            flags);
 383
 384                 raw_spin_lock_irqsave(&irq->irq_lock, flags);
 385                 goto retry;
 386         }
 387
 388         /*
 389          * Grab a reference to the irq to reflect the fact that it is
 390          * now in the ap_list.
 391          */
 392         vgic_get_irq_kref(irq);
 393         list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
 394         irq->vcpu = vcpu;
 395
 396         raw_spin_unlock(&irq->irq_lock);
 397         raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 398
 399         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 400         kvm_vcpu_kick(vcpu);
 401
 402         return true;
 403 }
 404
 405 /**
 406  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
 407  * @kvm:     The VM structure pointer
 408  * @cpuid:   The CPU for PPIs
 409  * @intid:   The INTID to inject a new state to.
 410  * @level:   Edge-triggered:  true:  to trigger the interrupt
 411  *                            false: to ignore the call
 412  *           Level-sensitive  true:  raise the input signal
 413  *                            false: lower the input signal
 414  * @owner:   The opaque pointer to the owner of the IRQ being raised to verify
 415  *           that the caller is allowed to inject this IRQ.  Userspace
 416  *           injections will have owner == NULL.
 417  *
 418  * The VGIC is not concerned with devices being active-LOW or active-HIGH for
 419  * level-sensitive interrupts.  You can think of the level parameter as 1
 420  * being HIGH and 0 being LOW and all devices being active-HIGH.
 421  */
 422 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 423                         bool level, void *owner)
 424 {
 425         struct kvm_vcpu *vcpu;
 426         struct vgic_irq *irq;
 427         unsigned long flags;
 428         int ret;
 429
 430         trace_vgic_update_irq_pending(cpuid, intid, level);
 431
 432         ret = vgic_lazy_init(kvm);
 433         if (ret)
 434                 return ret;
 435
 436         vcpu = kvm_get_vcpu(kvm, cpuid);
 437         if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
 438                 return -EINVAL;
 439
 440         irq = vgic_get_irq(kvm, vcpu, intid);
 441         if (!irq)
 442                 return -EINVAL;
 443
 444         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 445
 446         if (!vgic_validate_injection(irq, level, owner)) {
 447                 /* Nothing to see here, move along... */
 448                 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 449                 vgic_put_irq(kvm, irq);
 450                 return 0;
 451         }
 452
 453         if (irq->config == VGIC_CONFIG_LEVEL)
 454                 irq->line_level = level;
 455         else
 456                 irq->pending_latch = true;
 457
 458         vgic_queue_irq_unlock(kvm, irq, flags);
 459         vgic_put_irq(kvm, irq);
 460
 461         return 0;
 462 }
 463
 464 /* @irq->irq_lock must be held */
 465 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
 466                             unsigned int host_irq,
 467                             bool (*get_input_level)(int vindid))
 468 {
 469         struct irq_desc *desc;
 470         struct irq_data *data;
 471
 472         /*
 473          * Find the physical IRQ number corresponding to @host_irq
 474          */
 475         desc = irq_to_desc(host_irq);
 476         if (!desc) {
 477                 kvm_err("%s: no interrupt descriptor\n", __func__);
 478                 return -EINVAL;
 479         }
 480         data = irq_desc_get_irq_data(desc);
 481         while (data->parent_data)
 482                 data = data->parent_data;
 483
 484         irq->hw = true;
 485         irq->host_irq = host_irq;
 486         irq->hwintid = data->hwirq;
 487         irq->get_input_level = get_input_level;
 488         return 0;
 489 }
 490
 491 /* @irq->irq_lock must be held */
 492 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
 493 {
 494         irq->hw = false;
 495         irq->hwintid = 0;
 496         irq->get_input_level = NULL;
 497 }
 498
 499 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
 500                           u32 vintid, bool (*get_input_level)(int vindid))
 501 {
 502         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 503         unsigned long flags;
 504         int ret;
 505
 506         BUG_ON(!irq);
 507
 508         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 509         ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
 510         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 511         vgic_put_irq(vcpu->kvm, irq);
 512
 513         return ret;
 514 }
 515
 516 /**
 517  * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
 518  * @vcpu: The VCPU pointer
 519  * @vintid: The INTID of the interrupt
 520  *
 521  * Reset the active and pending states of a mapped interrupt.  Kernel
 522  * subsystems injecting mapped interrupts should reset their interrupt lines
 523  * when we are doing a reset of the VM.
 524  */
 525 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
 526 {
 527         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 528         unsigned long flags;
 529
 530         if (!irq->hw)
 531                 goto out;
 532
 533         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 534         irq->active = false;
 535         irq->pending_latch = false;
 536         irq->line_level = false;
 537         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 538 out:
 539         vgic_put_irq(vcpu->kvm, irq);
 540 }
 541
 542 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
 543 {
 544         struct vgic_irq *irq;
 545         unsigned long flags;
 546
 547         if (!vgic_initialized(vcpu->kvm))
 548                 return -EAGAIN;
 549
 550         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 551         BUG_ON(!irq);
 552
 553         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 554         kvm_vgic_unmap_irq(irq);
 555         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 556         vgic_put_irq(vcpu->kvm, irq);
 557
 558         return 0;
 559 }
 560
 561 /**
 562  * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
 563  *
 564  * @vcpu:   Pointer to the VCPU (used for PPIs)
 565  * @intid:  The virtual INTID identifying the interrupt (PPI or SPI)
 566  * @owner:  Opaque pointer to the owner
 567  *
 568  * Returns 0 if intid is not already used by another in-kernel device and the
 569  * owner is set, otherwise returns an error code.
 570  */
 571 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
 572 {
 573         struct vgic_irq *irq;
 574         unsigned long flags;
 575         int ret = 0;
 576
 577         if (!vgic_initialized(vcpu->kvm))
 578                 return -EAGAIN;
 579
 580         /* SGIs and LPIs cannot be wired up to any device */
 581         if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
 582                 return -EINVAL;
 583
 584         irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 585         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 586         if (irq->owner && irq->owner != owner)
 587                 ret = -EEXIST;
 588         else
 589                 irq->owner = owner;
 590         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 591
 592         return ret;
 593 }
 594
 595 /**
 596  * vgic_prune_ap_list - Remove non-relevant interrupts from the list
 597  *
 598  * @vcpu: The VCPU pointer
 599  *
 600  * Go over the list of "interesting" interrupts, and prune those that we
 601  * won't have to consider in the near future.
 602  */
 603 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
 604 {
 605         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 606         struct vgic_irq *irq, *tmp;
 607
 608         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 609
 610 retry:
 611         raw_spin_lock(&vgic_cpu->ap_list_lock);
 612
 613         list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
 614                 struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
 615                 bool target_vcpu_needs_kick = false;
 616
 617                 raw_spin_lock(&irq->irq_lock);
 618
 619                 BUG_ON(vcpu != irq->vcpu);
 620
 621                 target_vcpu = vgic_target_oracle(irq);
 622
 623                 if (!target_vcpu) {
 624                         /*
 625                          * We don't need to process this interrupt any
 626                          * further, move it off the list.
 627                          */
 628                         list_del(&irq->ap_list);
 629                         irq->vcpu = NULL;
 630                         raw_spin_unlock(&irq->irq_lock);
 631
 632                         /*
 633                          * This vgic_put_irq call matches the
 634                          * vgic_get_irq_kref in vgic_queue_irq_unlock,
 635                          * where we added the LPI to the ap_list. As
 636                          * we remove the irq from the list, we drop
 637                          * also drop the refcount.
 638                          */
 639                         vgic_put_irq(vcpu->kvm, irq);
 640                         continue;
 641                 }
 642
 643                 if (target_vcpu == vcpu) {
 644                         /* We're on the right CPU */
 645                         raw_spin_unlock(&irq->irq_lock);
 646                         continue;
 647                 }
 648
 649                 /* This interrupt looks like it has to be migrated. */
 650
 651                 raw_spin_unlock(&irq->irq_lock);
 652                 raw_spin_unlock(&vgic_cpu->ap_list_lock);
 653
 654                 /*
 655                  * Ensure locking order by always locking the smallest
 656                  * ID first.
 657                  */
 658                 if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
 659                         vcpuA = vcpu;
 660                         vcpuB = target_vcpu;
 661                 } else {
 662                         vcpuA = target_vcpu;
 663                         vcpuB = vcpu;
 664                 }
 665
 666                 raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 667                 raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
 668                                       SINGLE_DEPTH_NESTING);
 669                 raw_spin_lock(&irq->irq_lock);
 670
 671                 /*
 672                  * If the affinity has been preserved, move the
 673                  * interrupt around. Otherwise, it means things have
 674                  * changed while the interrupt was unlocked, and we
 675                  * need to replay this.
 676                  *
 677                  * In all cases, we cannot trust the list not to have
 678                  * changed, so we restart from the beginning.
 679                  */
 680                 if (target_vcpu == vgic_target_oracle(irq)) {
 681                         struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
 682
 683                         list_del(&irq->ap_list);
 684                         irq->vcpu = target_vcpu;
 685                         list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
 686                         target_vcpu_needs_kick = true;
 687                 }
 688
 689                 raw_spin_unlock(&irq->irq_lock);
 690                 raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
 691                 raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 692
 693                 if (target_vcpu_needs_kick) {
 694                         kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
 695                         kvm_vcpu_kick(target_vcpu);
 696                 }
 697
 698                 goto retry;
 699         }
 700
 701         raw_spin_unlock(&vgic_cpu->ap_list_lock);
 702 }
 703
 704 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
 705 {
 706         if (kvm_vgic_global_state.type == VGIC_V2)
 707                 vgic_v2_fold_lr_state(vcpu);
 708         else
 709                 vgic_v3_fold_lr_state(vcpu);
 710 }
 711
 712 /* Requires the irq_lock to be held. */
 713 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
 714                                     struct vgic_irq *irq, int lr)
 715 {
 716         lockdep_assert_held(&irq->irq_lock);
 717
 718         if (kvm_vgic_global_state.type == VGIC_V2)
 719                 vgic_v2_populate_lr(vcpu, irq, lr);
 720         else
 721                 vgic_v3_populate_lr(vcpu, irq, lr);
 722 }
 723
 724 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
 725 {
 726         if (kvm_vgic_global_state.type == VGIC_V2)
 727                 vgic_v2_clear_lr(vcpu, lr);
 728         else
 729                 vgic_v3_clear_lr(vcpu, lr);
 730 }
 731
 732 static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
 733 {
 734         if (kvm_vgic_global_state.type == VGIC_V2)
 735                 vgic_v2_set_underflow(vcpu);
 736         else
 737                 vgic_v3_set_underflow(vcpu);
 738 }
 739
 740 /* Requires the ap_list_lock to be held. */
 741 static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
 742                                  bool *multi_sgi)
 743 {
 744         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 745         struct vgic_irq *irq;
 746         int count = 0;
 747
 748         *multi_sgi = false;
 749
 750         lockdep_assert_held(&vgic_cpu->ap_list_lock);
 751
 752         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 753                 int w;
 754
 755                 raw_spin_lock(&irq->irq_lock);
 756                 /* GICv2 SGIs can count for more than one... */
 757                 w = vgic_irq_get_lr_count(irq);
 758                 raw_spin_unlock(&irq->irq_lock);
 759
 760                 count += w;
 761                 *multi_sgi |= (w > 1);
 762         }
 763         return count;
 764 }
 765
 766 /* Requires the VCPU's ap_list_lock to be held. */
 767 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 768 {
 769         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 770         struct vgic_irq *irq;
 771         int count;
 772         bool multi_sgi;
 773         u8 prio = 0xff;
 774
 775         lockdep_assert_held(&vgic_cpu->ap_list_lock);
 776
 777         count = compute_ap_list_depth(vcpu, &multi_sgi);
 778         if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
 779                 vgic_sort_ap_list(vcpu);
 780
 781         count = 0;
 782
 783         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 784                 raw_spin_lock(&irq->irq_lock);
 785
 786                 /*
 787                  * If we have multi-SGIs in the pipeline, we need to
 788                  * guarantee that they are all seen before any IRQ of
 789                  * lower priority. In that case, we need to filter out
 790                  * these interrupts by exiting early. This is easy as
 791                  * the AP list has been sorted already.
 792                  */
 793                 if (multi_sgi && irq->priority > prio) {
 794                         _raw_spin_unlock(&irq->irq_lock);
 795                         break;
 796                 }
 797
 798                 if (likely(vgic_target_oracle(irq) == vcpu)) {
 799                         vgic_populate_lr(vcpu, irq, count++);
 800
 801                         if (irq->source)
 802                                 prio = irq->priority;
 803                 }
 804
 805                 raw_spin_unlock(&irq->irq_lock);
 806
 807                 if (count == kvm_vgic_global_state.nr_lr) {
 808                         if (!list_is_last(&irq->ap_list,
 809                                           &vgic_cpu->ap_list_head))
 810                                 vgic_set_underflow(vcpu);
 811                         break;
 812                 }
 813         }
 814
 815         vcpu->arch.vgic_cpu.used_lrs = count;
 816
 817         /* Nuke remaining LRs */
 818         for ( ; count < kvm_vgic_global_state.nr_lr; count++)
 819                 vgic_clear_lr(vcpu, count);
 820 }
 821
 822 static inline bool can_access_vgic_from_kernel(void)
 823 {
 824         /*
 825          * GICv2 can always be accessed from the kernel because it is
 826          * memory-mapped, and VHE systems can access GICv3 EL2 system
 827          * registers.
 828          */
 829         return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
 830 }
 831
 832 static inline void vgic_save_state(struct kvm_vcpu *vcpu)
 833 {
 834         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 835                 vgic_v2_save_state(vcpu);
 836         else
 837                 __vgic_v3_save_state(vcpu);
 838 }
 839
 840 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
 841 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 842 {
 843         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 844
 845         WARN_ON(vgic_v4_sync_hwstate(vcpu));
 846
 847         /* An empty ap_list_head implies used_lrs == 0 */
 848         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 849                 return;
 850
 851         if (can_access_vgic_from_kernel())
 852                 vgic_save_state(vcpu);
 853
 854         if (vgic_cpu->used_lrs)
 855                 vgic_fold_lr_state(vcpu);
 856         vgic_prune_ap_list(vcpu);
 857 }
 858
 859 static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
 860 {
 861         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 862                 vgic_v2_restore_state(vcpu);
 863         else
 864                 __vgic_v3_restore_state(vcpu);
 865 }
 866
 867 /* Flush our emulation state into the GIC hardware before entering the guest. */
 868 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 869 {
 870         WARN_ON(vgic_v4_flush_hwstate(vcpu));
 871
 872         /*
 873          * If there are no virtual interrupts active or pending for this
 874          * VCPU, then there is no work to do and we can bail out without
 875          * taking any lock.  There is a potential race with someone injecting
 876          * interrupts to the VCPU, but it is a benign race as the VCPU will
 877          * either observe the new interrupt before or after doing this check,
 878          * and introducing additional synchronization mechanism doesn't change
 879          * this.
 880          *
 881          * Note that we still need to go through the whole thing if anything
 882          * can be directly injected (GICv4).
 883          */
 884         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
 885             !vgic_supports_direct_msis(vcpu->kvm))
 886                 return;
 887
 888         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 889
 890         if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
 891                 raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
 892                 vgic_flush_lr_state(vcpu);
 893                 raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
 894         }
 895
 896         if (can_access_vgic_from_kernel())
 897                 vgic_restore_state(vcpu);
 898 }
 899
 900 void kvm_vgic_load(struct kvm_vcpu *vcpu)
 901 {
 902         if (unlikely(!vgic_initialized(vcpu->kvm)))
 903                 return;
 904
 905         if (kvm_vgic_global_state.type == VGIC_V2)
 906                 vgic_v2_load(vcpu);
 907         else
 908                 vgic_v3_load(vcpu);
 909 }
 910
 911 void kvm_vgic_put(struct kvm_vcpu *vcpu)
 912 {
 913         if (unlikely(!vgic_initialized(vcpu->kvm)))
 914                 return;
 915
 916         if (kvm_vgic_global_state.type == VGIC_V2)
 917                 vgic_v2_put(vcpu);
 918         else
 919                 vgic_v3_put(vcpu);
 920 }
 921
 922 void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
 923 {
 924         if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
 925                 return;
 926
 927         if (kvm_vgic_global_state.type == VGIC_V2)
 928                 vgic_v2_vmcr_sync(vcpu);
 929         else
 930                 vgic_v3_vmcr_sync(vcpu);
 931 }
 932
 933 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 934 {
 935         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 936         struct vgic_irq *irq;
 937         bool pending = false;
 938         unsigned long flags;
 939         struct vgic_vmcr vmcr;
 940
 941         if (!vcpu->kvm->arch.vgic.enabled)
 942                 return false;
 943
 944         if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
 945                 return true;
 946
 947         vgic_get_vmcr(vcpu, &vmcr);
 948
 949         raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
 950
 951         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 952                 raw_spin_lock(&irq->irq_lock);
 953                 pending = irq_is_pending(irq) && irq->enabled &&
 954                           !irq->active &&
 955                           irq->priority < vmcr.pmr;
 956                 raw_spin_unlock(&irq->irq_lock);
 957
 958                 if (pending)
 959                         break;
 960         }
 961
 962         raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
 963
 964         return pending;
 965 }
 966
 967 void vgic_kick_vcpus(struct kvm *kvm)
 968 {
 969         struct kvm_vcpu *vcpu;
 970         int c;
 971
 972         /*
 973          * We've injected an interrupt, time to find out who deserves
 974          * a good kick...
 975          */
 976         kvm_for_each_vcpu(c, vcpu, kvm) {
 977                 if (kvm_vgic_vcpu_pending_irq(vcpu)) {
 978                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 979                         kvm_vcpu_kick(vcpu);
 980                 }
 981         }
 982 }
 983
 984 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
 985 {
 986         struct vgic_irq *irq;
 987         bool map_is_active;
 988         unsigned long flags;
 989
 990         if (!vgic_initialized(vcpu->kvm))
 991                 return false;
 992
 993         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 994         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 995         map_is_active = irq->hw && irq->active;
 996         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 997         vgic_put_irq(vcpu->kvm, irq);
 998
 999         return map_is_active;
1000 }