virt/kvm/arm/vgic/vgic.c

   1 /*
   2  * Copyright (C) 2015, 2016 ARM Ltd.
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License version 2 as
   6  * published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11  * GNU General Public License for more details.
  12  *
  13  * You should have received a copy of the GNU General Public License
  14  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  15  */
  16
  17 #include <linux/interrupt.h>
  18 #include <linux/irq.h>
  19 #include <linux/kvm.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/list_sort.h>
  22 #include <linux/nospec.h>
  23
  24 #include <asm/kvm_hyp.h>
  25
  26 #include "vgic.h"
  27
  28 #define CREATE_TRACE_POINTS
  29 #include "trace.h"
  30
  31 struct vgic_global kvm_vgic_global_state __ro_after_init = {
  32         .gicv3_cpuif = STATIC_KEY_FALSE_INIT,
  33 };
  34
  35 /*
  36  * Locking order is always:
  37  * kvm->lock (mutex)
  38  *   its->cmd_lock (mutex)
  39  *     its->its_lock (mutex)
  40  *       vgic_cpu->ap_list_lock         must be taken with IRQs disabled
  41  *         kvm->lpi_list_lock           must be taken with IRQs disabled
  42  *           vgic_irq->irq_lock         must be taken with IRQs disabled
  43  *
  44  * As the ap_list_lock might be taken from the timer interrupt handler,
  45  * we have to disable IRQs before taking this lock and everything lower
  46  * than it.
  47  *
  48  * If you need to take multiple locks, always take the upper lock first,
  49  * then the lower ones, e.g. first take the its_lock, then the irq_lock.
  50  * If you are already holding a lock and need to take a higher one, you
  51  * have to drop the lower ranking lock first and re-aquire it after having
  52  * taken the upper one.
  53  *
  54  * When taking more than one ap_list_lock at the same time, always take the
  55  * lowest numbered VCPU's ap_list_lock first, so:
  56  *   vcpuX->vcpu_id < vcpuY->vcpu_id:
  57  *     raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
  58  *     raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
  59  *
  60  * Since the VGIC must support injecting virtual interrupts from ISRs, we have
  61  * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer
  62  * spinlocks for any lock that may be taken while injecting an interrupt.
  63  */
  64
  65 /*
  66  * Iterate over the VM's list of mapped LPIs to find the one with a
  67  * matching interrupt ID and return a reference to the IRQ structure.
  68  */
  69 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
  70 {
  71         struct vgic_dist *dist = &kvm->arch.vgic;
  72         struct vgic_irq *irq = NULL;
  73         unsigned long flags;
  74
  75         raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
  76
  77         list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
  78                 if (irq->intid != intid)
  79                         continue;
  80
  81                 /*
  82                  * This increases the refcount, the caller is expected to
  83                  * call vgic_put_irq() later once it's finished with the IRQ.
  84                  */
  85                 vgic_get_irq_kref(irq);
  86                 goto out_unlock;
  87         }
  88         irq = NULL;
  89
  90 out_unlock:
  91         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
  92
  93         return irq;
  94 }
  95
  96 /*
  97  * This looks up the virtual interrupt ID to get the corresponding
  98  * struct vgic_irq. It also increases the refcount, so any caller is expected
  99  * to call vgic_put_irq() once it's finished with this IRQ.
 100  */
 101 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
 102                               u32 intid)
 103 {
 104         /* SGIs and PPIs */
 105         if (intid <= VGIC_MAX_PRIVATE) {
 106                 intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1);
 107                 return &vcpu->arch.vgic_cpu.private_irqs[intid];
 108         }
 109
 110         /* SPIs */
 111         if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
 112                 intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS);
 113                 return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
 114         }
 115
 116         /* LPIs */
 117         if (intid >= VGIC_MIN_LPI)
 118                 return vgic_get_lpi(kvm, intid);
 119
 120         WARN(1, "Looking up struct vgic_irq for reserved INTID");
 121         return NULL;
 122 }
 123
 124 /*
 125  * We can't do anything in here, because we lack the kvm pointer to
 126  * lock and remove the item from the lpi_list. So we keep this function
 127  * empty and use the return value of kref_put() to trigger the freeing.
 128  */
 129 static void vgic_irq_release(struct kref *ref)
 130 {
 131 }
 132
 133 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 134 {
 135         struct vgic_dist *dist = &kvm->arch.vgic;
 136         unsigned long flags;
 137
 138         if (irq->intid < VGIC_MIN_LPI)
 139                 return;
 140
 141         raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
 142         if (!kref_put(&irq->refcount, vgic_irq_release)) {
 143                 raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 144                 return;
 145         };
 146
 147         list_del(&irq->lpi_list);
 148         dist->lpi_list_count--;
 149         raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
 150
 151         kfree(irq);
 152 }
 153
 154 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
 155 {
 156         WARN_ON(irq_set_irqchip_state(irq->host_irq,
 157                                       IRQCHIP_STATE_PENDING,
 158                                       pending));
 159 }
 160
 161 bool vgic_get_phys_line_level(struct vgic_irq *irq)
 162 {
 163         bool line_level;
 164
 165         BUG_ON(!irq->hw);
 166
 167         if (irq->get_input_level)
 168                 return irq->get_input_level(irq->intid);
 169
 170         WARN_ON(irq_get_irqchip_state(irq->host_irq,
 171                                       IRQCHIP_STATE_PENDING,
 172                                       &line_level));
 173         return line_level;
 174 }
 175
 176 /* Set/Clear the physical active state */
 177 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
 178 {
 179
 180         BUG_ON(!irq->hw);
 181         WARN_ON(irq_set_irqchip_state(irq->host_irq,
 182                                       IRQCHIP_STATE_ACTIVE,
 183                                       active));
 184 }
 185
 186 /**
 187  * kvm_vgic_target_oracle - compute the target vcpu for an irq
 188  *
 189  * @irq:        The irq to route. Must be already locked.
 190  *
 191  * Based on the current state of the interrupt (enabled, pending,
 192  * active, vcpu and target_vcpu), compute the next vcpu this should be
 193  * given to. Return NULL if this shouldn't be injected at all.
 194  *
 195  * Requires the IRQ lock to be held.
 196  */
 197 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
 198 {
 199         lockdep_assert_held(&irq->irq_lock);
 200
 201         /* If the interrupt is active, it must stay on the current vcpu */
 202         if (irq->active)
 203                 return irq->vcpu ? : irq->target_vcpu;
 204
 205         /*
 206          * If the IRQ is not active but enabled and pending, we should direct
 207          * it to its configured target VCPU.
 208          * If the distributor is disabled, pending interrupts shouldn't be
 209          * forwarded.
 210          */
 211         if (irq->enabled && irq_is_pending(irq)) {
 212                 if (unlikely(irq->target_vcpu &&
 213                              !irq->target_vcpu->kvm->arch.vgic.enabled))
 214                         return NULL;
 215
 216                 return irq->target_vcpu;
 217         }
 218
 219         /* If neither active nor pending and enabled, then this IRQ should not
 220          * be queued to any VCPU.
 221          */
 222         return NULL;
 223 }
 224
 225 /*
 226  * The order of items in the ap_lists defines how we'll pack things in LRs as
 227  * well, the first items in the list being the first things populated in the
 228  * LRs.
 229  *
 230  * A hard rule is that active interrupts can never be pushed out of the LRs
 231  * (and therefore take priority) since we cannot reliably trap on deactivation
 232  * of IRQs and therefore they have to be present in the LRs.
 233  *
 234  * Otherwise things should be sorted by the priority field and the GIC
 235  * hardware support will take care of preemption of priority groups etc.
 236  *
 237  * Return negative if "a" sorts before "b", 0 to preserve order, and positive
 238  * to sort "b" before "a".
 239  */
 240 static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
 241 {
 242         struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
 243         struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
 244         bool penda, pendb;
 245         int ret;
 246
 247         raw_spin_lock(&irqa->irq_lock);
 248         raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
 249
 250         if (irqa->active || irqb->active) {
 251                 ret = (int)irqb->active - (int)irqa->active;
 252                 goto out;
 253         }
 254
 255         penda = irqa->enabled && irq_is_pending(irqa);
 256         pendb = irqb->enabled && irq_is_pending(irqb);
 257
 258         if (!penda || !pendb) {
 259                 ret = (int)pendb - (int)penda;
 260                 goto out;
 261         }
 262
 263         /* Both pending and enabled, sort by priority */
 264         ret = irqa->priority - irqb->priority;
 265 out:
 266         raw_spin_unlock(&irqb->irq_lock);
 267         raw_spin_unlock(&irqa->irq_lock);
 268         return ret;
 269 }
 270
 271 /* Must be called with the ap_list_lock held */
 272 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
 273 {
 274         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 275
 276         lockdep_assert_held(&vgic_cpu->ap_list_lock);
 277
 278         list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
 279 }
 280
 281 /*
 282  * Only valid injection if changing level for level-triggered IRQs or for a
 283  * rising edge, and in-kernel connected IRQ lines can only be controlled by
 284  * their owner.
 285  */
 286 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
 287 {
 288         if (irq->owner != owner)
 289                 return false;
 290
 291         switch (irq->config) {
 292         case VGIC_CONFIG_LEVEL:
 293                 return irq->line_level != level;
 294         case VGIC_CONFIG_EDGE:
 295                 return level;
 296         }
 297
 298         return false;
 299 }
 300
 301 /*
 302  * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
 303  * Do the queuing if necessary, taking the right locks in the right order.
 304  * Returns true when the IRQ was queued, false otherwise.
 305  *
 306  * Needs to be entered with the IRQ lock already held, but will return
 307  * with all locks dropped.
 308  */
 309 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
 310                            unsigned long flags)
 311 {
 312         struct kvm_vcpu *vcpu;
 313
 314         lockdep_assert_held(&irq->irq_lock);
 315
 316 retry:
 317         vcpu = vgic_target_oracle(irq);
 318         if (irq->vcpu || !vcpu) {
 319                 /*
 320                  * If this IRQ is already on a VCPU's ap_list, then it
 321                  * cannot be moved or modified and there is no more work for
 322                  * us to do.
 323                  *
 324                  * Otherwise, if the irq is not pending and enabled, it does
 325                  * not need to be inserted into an ap_list and there is also
 326                  * no more work for us to do.
 327                  */
 328                 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 329
 330                 /*
 331                  * We have to kick the VCPU here, because we could be
 332                  * queueing an edge-triggered interrupt for which we
 333                  * get no EOI maintenance interrupt. In that case,
 334                  * while the IRQ is already on the VCPU's AP list, the
 335                  * VCPU could have EOI'ed the original interrupt and
 336                  * won't see this one until it exits for some other
 337                  * reason.
 338                  */
 339                 if (vcpu) {
 340                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 341                         kvm_vcpu_kick(vcpu);
 342                 }
 343                 return false;
 344         }
 345
 346         /*
 347          * We must unlock the irq lock to take the ap_list_lock where
 348          * we are going to insert this new pending interrupt.
 349          */
 350         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 351
 352         /* someone can do stuff here, which we re-check below */
 353
 354         raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 355         raw_spin_lock(&irq->irq_lock);
 356
 357         /*
 358          * Did something change behind our backs?
 359          *
 360          * There are two cases:
 361          * 1) The irq lost its pending state or was disabled behind our
 362          *    backs and/or it was queued to another VCPU's ap_list.
 363          * 2) Someone changed the affinity on this irq behind our
 364          *    backs and we are now holding the wrong ap_list_lock.
 365          *
 366          * In both cases, drop the locks and retry.
 367          */
 368
 369         if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
 370                 raw_spin_unlock(&irq->irq_lock);
 371                 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock,
 372                                            flags);
 373
 374                 raw_spin_lock_irqsave(&irq->irq_lock, flags);
 375                 goto retry;
 376         }
 377
 378         /*
 379          * Grab a reference to the irq to reflect the fact that it is
 380          * now in the ap_list.
 381          */
 382         vgic_get_irq_kref(irq);
 383         list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
 384         irq->vcpu = vcpu;
 385
 386         raw_spin_unlock(&irq->irq_lock);
 387         raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
 388
 389         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 390         kvm_vcpu_kick(vcpu);
 391
 392         return true;
 393 }
 394
 395 /**
 396  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
 397  * @kvm:     The VM structure pointer
 398  * @cpuid:   The CPU for PPIs
 399  * @intid:   The INTID to inject a new state to.
 400  * @level:   Edge-triggered:  true:  to trigger the interrupt
 401  *                            false: to ignore the call
 402  *           Level-sensitive  true:  raise the input signal
 403  *                            false: lower the input signal
 404  * @owner:   The opaque pointer to the owner of the IRQ being raised to verify
 405  *           that the caller is allowed to inject this IRQ.  Userspace
 406  *           injections will have owner == NULL.
 407  *
 408  * The VGIC is not concerned with devices being active-LOW or active-HIGH for
 409  * level-sensitive interrupts.  You can think of the level parameter as 1
 410  * being HIGH and 0 being LOW and all devices being active-HIGH.
 411  */
 412 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 413                         bool level, void *owner)
 414 {
 415         struct kvm_vcpu *vcpu;
 416         struct vgic_irq *irq;
 417         unsigned long flags;
 418         int ret;
 419
 420         trace_vgic_update_irq_pending(cpuid, intid, level);
 421
 422         ret = vgic_lazy_init(kvm);
 423         if (ret)
 424                 return ret;
 425
 426         vcpu = kvm_get_vcpu(kvm, cpuid);
 427         if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
 428                 return -EINVAL;
 429
 430         irq = vgic_get_irq(kvm, vcpu, intid);
 431         if (!irq)
 432                 return -EINVAL;
 433
 434         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 435
 436         if (!vgic_validate_injection(irq, level, owner)) {
 437                 /* Nothing to see here, move along... */
 438                 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 439                 vgic_put_irq(kvm, irq);
 440                 return 0;
 441         }
 442
 443         if (irq->config == VGIC_CONFIG_LEVEL)
 444                 irq->line_level = level;
 445         else
 446                 irq->pending_latch = true;
 447
 448         vgic_queue_irq_unlock(kvm, irq, flags);
 449         vgic_put_irq(kvm, irq);
 450
 451         return 0;
 452 }
 453
 454 /* @irq->irq_lock must be held */
 455 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
 456                             unsigned int host_irq,
 457                             bool (*get_input_level)(int vindid))
 458 {
 459         struct irq_desc *desc;
 460         struct irq_data *data;
 461
 462         /*
 463          * Find the physical IRQ number corresponding to @host_irq
 464          */
 465         desc = irq_to_desc(host_irq);
 466         if (!desc) {
 467                 kvm_err("%s: no interrupt descriptor\n", __func__);
 468                 return -EINVAL;
 469         }
 470         data = irq_desc_get_irq_data(desc);
 471         while (data->parent_data)
 472                 data = data->parent_data;
 473
 474         irq->hw = true;
 475         irq->host_irq = host_irq;
 476         irq->hwintid = data->hwirq;
 477         irq->get_input_level = get_input_level;
 478         return 0;
 479 }
 480
 481 /* @irq->irq_lock must be held */
 482 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
 483 {
 484         irq->hw = false;
 485         irq->hwintid = 0;
 486         irq->get_input_level = NULL;
 487 }
 488
 489 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
 490                           u32 vintid, bool (*get_input_level)(int vindid))
 491 {
 492         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 493         unsigned long flags;
 494         int ret;
 495
 496         BUG_ON(!irq);
 497
 498         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 499         ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
 500         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 501         vgic_put_irq(vcpu->kvm, irq);
 502
 503         return ret;
 504 }
 505
 506 /**
 507  * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
 508  * @vcpu: The VCPU pointer
 509  * @vintid: The INTID of the interrupt
 510  *
 511  * Reset the active and pending states of a mapped interrupt.  Kernel
 512  * subsystems injecting mapped interrupts should reset their interrupt lines
 513  * when we are doing a reset of the VM.
 514  */
 515 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
 516 {
 517         struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 518         unsigned long flags;
 519
 520         if (!irq->hw)
 521                 goto out;
 522
 523         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 524         irq->active = false;
 525         irq->pending_latch = false;
 526         irq->line_level = false;
 527         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 528 out:
 529         vgic_put_irq(vcpu->kvm, irq);
 530 }
 531
 532 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
 533 {
 534         struct vgic_irq *irq;
 535         unsigned long flags;
 536
 537         if (!vgic_initialized(vcpu->kvm))
 538                 return -EAGAIN;
 539
 540         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 541         BUG_ON(!irq);
 542
 543         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 544         kvm_vgic_unmap_irq(irq);
 545         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 546         vgic_put_irq(vcpu->kvm, irq);
 547
 548         return 0;
 549 }
 550
 551 /**
 552  * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
 553  *
 554  * @vcpu:   Pointer to the VCPU (used for PPIs)
 555  * @intid:  The virtual INTID identifying the interrupt (PPI or SPI)
 556  * @owner:  Opaque pointer to the owner
 557  *
 558  * Returns 0 if intid is not already used by another in-kernel device and the
 559  * owner is set, otherwise returns an error code.
 560  */
 561 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
 562 {
 563         struct vgic_irq *irq;
 564         unsigned long flags;
 565         int ret = 0;
 566
 567         if (!vgic_initialized(vcpu->kvm))
 568                 return -EAGAIN;
 569
 570         /* SGIs and LPIs cannot be wired up to any device */
 571         if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid))
 572                 return -EINVAL;
 573
 574         irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 575         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 576         if (irq->owner && irq->owner != owner)
 577                 ret = -EEXIST;
 578         else
 579                 irq->owner = owner;
 580         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 581
 582         return ret;
 583 }
 584
 585 /**
 586  * vgic_prune_ap_list - Remove non-relevant interrupts from the list
 587  *
 588  * @vcpu: The VCPU pointer
 589  *
 590  * Go over the list of "interesting" interrupts, and prune those that we
 591  * won't have to consider in the near future.
 592  */
 593 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
 594 {
 595         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 596         struct vgic_irq *irq, *tmp;
 597
 598         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 599
 600 retry:
 601         raw_spin_lock(&vgic_cpu->ap_list_lock);
 602
 603         list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
 604                 struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
 605                 bool target_vcpu_needs_kick = false;
 606
 607                 raw_spin_lock(&irq->irq_lock);
 608
 609                 BUG_ON(vcpu != irq->vcpu);
 610
 611                 target_vcpu = vgic_target_oracle(irq);
 612
 613                 if (!target_vcpu) {
 614                         /*
 615                          * We don't need to process this interrupt any
 616                          * further, move it off the list.
 617                          */
 618                         list_del(&irq->ap_list);
 619                         irq->vcpu = NULL;
 620                         raw_spin_unlock(&irq->irq_lock);
 621
 622                         /*
 623                          * This vgic_put_irq call matches the
 624                          * vgic_get_irq_kref in vgic_queue_irq_unlock,
 625                          * where we added the LPI to the ap_list. As
 626                          * we remove the irq from the list, we drop
 627                          * also drop the refcount.
 628                          */
 629                         vgic_put_irq(vcpu->kvm, irq);
 630                         continue;
 631                 }
 632
 633                 if (target_vcpu == vcpu) {
 634                         /* We're on the right CPU */
 635                         raw_spin_unlock(&irq->irq_lock);
 636                         continue;
 637                 }
 638
 639                 /* This interrupt looks like it has to be migrated. */
 640
 641                 raw_spin_unlock(&irq->irq_lock);
 642                 raw_spin_unlock(&vgic_cpu->ap_list_lock);
 643
 644                 /*
 645                  * Ensure locking order by always locking the smallest
 646                  * ID first.
 647                  */
 648                 if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
 649                         vcpuA = vcpu;
 650                         vcpuB = target_vcpu;
 651                 } else {
 652                         vcpuA = target_vcpu;
 653                         vcpuB = vcpu;
 654                 }
 655
 656                 raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 657                 raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
 658                                       SINGLE_DEPTH_NESTING);
 659                 raw_spin_lock(&irq->irq_lock);
 660
 661                 /*
 662                  * If the affinity has been preserved, move the
 663                  * interrupt around. Otherwise, it means things have
 664                  * changed while the interrupt was unlocked, and we
 665                  * need to replay this.
 666                  *
 667                  * In all cases, we cannot trust the list not to have
 668                  * changed, so we restart from the beginning.
 669                  */
 670                 if (target_vcpu == vgic_target_oracle(irq)) {
 671                         struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
 672
 673                         list_del(&irq->ap_list);
 674                         irq->vcpu = target_vcpu;
 675                         list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
 676                         target_vcpu_needs_kick = true;
 677                 }
 678
 679                 raw_spin_unlock(&irq->irq_lock);
 680                 raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
 681                 raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 682
 683                 if (target_vcpu_needs_kick) {
 684                         kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
 685                         kvm_vcpu_kick(target_vcpu);
 686                 }
 687
 688                 goto retry;
 689         }
 690
 691         raw_spin_unlock(&vgic_cpu->ap_list_lock);
 692 }
 693
 694 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
 695 {
 696         if (kvm_vgic_global_state.type == VGIC_V2)
 697                 vgic_v2_fold_lr_state(vcpu);
 698         else
 699                 vgic_v3_fold_lr_state(vcpu);
 700 }
 701
 702 /* Requires the irq_lock to be held. */
 703 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
 704                                     struct vgic_irq *irq, int lr)
 705 {
 706         lockdep_assert_held(&irq->irq_lock);
 707
 708         if (kvm_vgic_global_state.type == VGIC_V2)
 709                 vgic_v2_populate_lr(vcpu, irq, lr);
 710         else
 711                 vgic_v3_populate_lr(vcpu, irq, lr);
 712 }
 713
 714 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
 715 {
 716         if (kvm_vgic_global_state.type == VGIC_V2)
 717                 vgic_v2_clear_lr(vcpu, lr);
 718         else
 719                 vgic_v3_clear_lr(vcpu, lr);
 720 }
 721
 722 static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
 723 {
 724         if (kvm_vgic_global_state.type == VGIC_V2)
 725                 vgic_v2_set_underflow(vcpu);
 726         else
 727                 vgic_v3_set_underflow(vcpu);
 728 }
 729
 730 /* Requires the ap_list_lock to be held. */
 731 static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
 732                                  bool *multi_sgi)
 733 {
 734         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 735         struct vgic_irq *irq;
 736         int count = 0;
 737
 738         *multi_sgi = false;
 739
 740         lockdep_assert_held(&vgic_cpu->ap_list_lock);
 741
 742         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 743                 int w;
 744
 745                 raw_spin_lock(&irq->irq_lock);
 746                 /* GICv2 SGIs can count for more than one... */
 747                 w = vgic_irq_get_lr_count(irq);
 748                 raw_spin_unlock(&irq->irq_lock);
 749
 750                 count += w;
 751                 *multi_sgi |= (w > 1);
 752         }
 753         return count;
 754 }
 755
 756 /* Requires the VCPU's ap_list_lock to be held. */
 757 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 758 {
 759         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 760         struct vgic_irq *irq;
 761         int count;
 762         bool multi_sgi;
 763         u8 prio = 0xff;
 764
 765         lockdep_assert_held(&vgic_cpu->ap_list_lock);
 766
 767         count = compute_ap_list_depth(vcpu, &multi_sgi);
 768         if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
 769                 vgic_sort_ap_list(vcpu);
 770
 771         count = 0;
 772
 773         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 774                 raw_spin_lock(&irq->irq_lock);
 775
 776                 /*
 777                  * If we have multi-SGIs in the pipeline, we need to
 778                  * guarantee that they are all seen before any IRQ of
 779                  * lower priority. In that case, we need to filter out
 780                  * these interrupts by exiting early. This is easy as
 781                  * the AP list has been sorted already.
 782                  */
 783                 if (multi_sgi && irq->priority > prio) {
 784                         _raw_spin_unlock(&irq->irq_lock);
 785                         break;
 786                 }
 787
 788                 if (likely(vgic_target_oracle(irq) == vcpu)) {
 789                         vgic_populate_lr(vcpu, irq, count++);
 790
 791                         if (irq->source)
 792                                 prio = irq->priority;
 793                 }
 794
 795                 raw_spin_unlock(&irq->irq_lock);
 796
 797                 if (count == kvm_vgic_global_state.nr_lr) {
 798                         if (!list_is_last(&irq->ap_list,
 799                                           &vgic_cpu->ap_list_head))
 800                                 vgic_set_underflow(vcpu);
 801                         break;
 802                 }
 803         }
 804
 805         vcpu->arch.vgic_cpu.used_lrs = count;
 806
 807         /* Nuke remaining LRs */
 808         for ( ; count < kvm_vgic_global_state.nr_lr; count++)
 809                 vgic_clear_lr(vcpu, count);
 810 }
 811
 812 static inline bool can_access_vgic_from_kernel(void)
 813 {
 814         /*
 815          * GICv2 can always be accessed from the kernel because it is
 816          * memory-mapped, and VHE systems can access GICv3 EL2 system
 817          * registers.
 818          */
 819         return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
 820 }
 821
 822 static inline void vgic_save_state(struct kvm_vcpu *vcpu)
 823 {
 824         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 825                 vgic_v2_save_state(vcpu);
 826         else
 827                 __vgic_v3_save_state(vcpu);
 828 }
 829
 830 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
 831 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 832 {
 833         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 834
 835         WARN_ON(vgic_v4_sync_hwstate(vcpu));
 836
 837         /* An empty ap_list_head implies used_lrs == 0 */
 838         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 839                 return;
 840
 841         if (can_access_vgic_from_kernel())
 842                 vgic_save_state(vcpu);
 843
 844         if (vgic_cpu->used_lrs)
 845                 vgic_fold_lr_state(vcpu);
 846         vgic_prune_ap_list(vcpu);
 847 }
 848
 849 static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
 850 {
 851         if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
 852                 vgic_v2_restore_state(vcpu);
 853         else
 854                 __vgic_v3_restore_state(vcpu);
 855 }
 856
 857 /* Flush our emulation state into the GIC hardware before entering the guest. */
 858 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 859 {
 860         WARN_ON(vgic_v4_flush_hwstate(vcpu));
 861
 862         /*
 863          * If there are no virtual interrupts active or pending for this
 864          * VCPU, then there is no work to do and we can bail out without
 865          * taking any lock.  There is a potential race with someone injecting
 866          * interrupts to the VCPU, but it is a benign race as the VCPU will
 867          * either observe the new interrupt before or after doing this check,
 868          * and introducing additional synchronization mechanism doesn't change
 869          * this.
 870          *
 871          * Note that we still need to go through the whole thing if anything
 872          * can be directly injected (GICv4).
 873          */
 874         if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
 875             !vgic_supports_direct_msis(vcpu->kvm))
 876                 return;
 877
 878         DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 879
 880         if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
 881                 raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
 882                 vgic_flush_lr_state(vcpu);
 883                 raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
 884         }
 885
 886         if (can_access_vgic_from_kernel())
 887                 vgic_restore_state(vcpu);
 888 }
 889
 890 void kvm_vgic_load(struct kvm_vcpu *vcpu)
 891 {
 892         if (unlikely(!vgic_initialized(vcpu->kvm)))
 893                 return;
 894
 895         if (kvm_vgic_global_state.type == VGIC_V2)
 896                 vgic_v2_load(vcpu);
 897         else
 898                 vgic_v3_load(vcpu);
 899 }
 900
 901 void kvm_vgic_put(struct kvm_vcpu *vcpu)
 902 {
 903         if (unlikely(!vgic_initialized(vcpu->kvm)))
 904                 return;
 905
 906         if (kvm_vgic_global_state.type == VGIC_V2)
 907                 vgic_v2_put(vcpu);
 908         else
 909                 vgic_v3_put(vcpu);
 910 }
 911
 912 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 913 {
 914         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 915         struct vgic_irq *irq;
 916         bool pending = false;
 917         unsigned long flags;
 918         struct vgic_vmcr vmcr;
 919
 920         if (!vcpu->kvm->arch.vgic.enabled)
 921                 return false;
 922
 923         if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
 924                 return true;
 925
 926         vgic_get_vmcr(vcpu, &vmcr);
 927
 928         raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
 929
 930         list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
 931                 raw_spin_lock(&irq->irq_lock);
 932                 pending = irq_is_pending(irq) && irq->enabled &&
 933                           !irq->active &&
 934                           irq->priority < vmcr.pmr;
 935                 raw_spin_unlock(&irq->irq_lock);
 936
 937                 if (pending)
 938                         break;
 939         }
 940
 941         raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
 942
 943         return pending;
 944 }
 945
 946 void vgic_kick_vcpus(struct kvm *kvm)
 947 {
 948         struct kvm_vcpu *vcpu;
 949         int c;
 950
 951         /*
 952          * We've injected an interrupt, time to find out who deserves
 953          * a good kick...
 954          */
 955         kvm_for_each_vcpu(c, vcpu, kvm) {
 956                 if (kvm_vgic_vcpu_pending_irq(vcpu)) {
 957                         kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 958                         kvm_vcpu_kick(vcpu);
 959                 }
 960         }
 961 }
 962
 963 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
 964 {
 965         struct vgic_irq *irq;
 966         bool map_is_active;
 967         unsigned long flags;
 968
 969         if (!vgic_initialized(vcpu->kvm))
 970                 return false;
 971
 972         irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 973         raw_spin_lock_irqsave(&irq->irq_lock, flags);
 974         map_is_active = irq->hw && irq->active;
 975         raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 976         vgic_put_irq(vcpu->kvm, irq);
 977
 978         return map_is_active;
 979 }