Merge tag 'kvm-arm-for-v4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm...
authorRadim Krčmář <rkrcmar@redhat.com>
Wed, 31 Jan 2018 12:34:41 +0000 (13:34 +0100)
committerRadim Krčmář <rkrcmar@redhat.com>
Wed, 31 Jan 2018 12:34:41 +0000 (13:34 +0100)
KVM/ARM Changes for v4.16

The changes for this version include icache invalidation optimizations
(improving VM startup time), support for forwarded level-triggered
interrupts (improved performance for timers and passthrough platform
devices), a small fix for power-management notifiers, and some cosmetic
changes.

37 files changed:
Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt [deleted file]
arch/arm/include/asm/kvm_emulate.h
arch/arm/include/asm/kvm_host.h
arch/arm/include/asm/kvm_hyp.h
arch/arm/include/asm/kvm_mmu.h
arch/arm/include/asm/pgtable.h
arch/arm/kvm/hyp/switch.c
arch/arm/kvm/hyp/tlb.c
arch/arm64/include/asm/asm-uaccess.h
arch/arm64/include/asm/assembler.h
arch/arm64/include/asm/cacheflush.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_hyp.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/pgtable-hwdef.h
arch/arm64/include/asm/pgtable-prot.h
arch/arm64/kvm/hyp/debug-sr.c
arch/arm64/kvm/hyp/switch.c
arch/arm64/kvm/hyp/tlb.c
arch/arm64/lib/clear_user.S
arch/arm64/lib/copy_from_user.S
arch/arm64/lib/copy_in_user.S
arch/arm64/lib/copy_to_user.S
arch/arm64/mm/cache.S
arch/arm64/xen/hypercall.S
include/kvm/arm_arch_timer.h
include/kvm/arm_vgic.h
virt/kvm/arm/arch_timer.c
virt/kvm/arm/arm.c
virt/kvm/arm/hyp/vgic-v2-sr.c
virt/kvm/arm/mmu.c
virt/kvm/arm/vgic/vgic-its.c
virt/kvm/arm/vgic/vgic-mmio.c
virt/kvm/arm/vgic/vgic-v2.c
virt/kvm/arm/vgic/vgic-v3.c
virt/kvm/arm/vgic/vgic.c
virt/kvm/arm/vgic/vgic.h

diff --git a/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
deleted file mode 100644 (file)
index 38bca28..0000000
+++ /dev/null
@@ -1,187 +0,0 @@
-KVM/ARM VGIC Forwarded Physical Interrupts
-==========================================
-
-The KVM/ARM code implements software support for the ARM Generic
-Interrupt Controller's (GIC's) hardware support for virtualization by
-allowing software to inject virtual interrupts to a VM, which the guest
-OS sees as regular interrupts.  The code is famously known as the VGIC.
-
-Some of these virtual interrupts, however, correspond to physical
-interrupts from real physical devices.  One example could be the
-architected timer, which itself supports virtualization, and therefore
-lets a guest OS program the hardware device directly to raise an
-interrupt at some point in time.  When such an interrupt is raised, the
-host OS initially handles the interrupt and must somehow signal this
-event as a virtual interrupt to the guest.  Another example could be a
-passthrough device, where the physical interrupts are initially handled
-by the host, but the device driver for the device lives in the guest OS
-and KVM must therefore somehow inject a virtual interrupt on behalf of
-the physical one to the guest OS.
-
-These virtual interrupts corresponding to a physical interrupt on the
-host are called forwarded physical interrupts, but are also sometimes
-referred to as 'virtualized physical interrupts' and 'mapped interrupts'.
-
-Forwarded physical interrupts are handled slightly differently compared
-to virtual interrupts generated purely by a software emulated device.
-
-
-The HW bit
-----------
-Virtual interrupts are signalled to the guest by programming the List
-Registers (LRs) on the GIC before running a VCPU.  The LR is programmed
-with the virtual IRQ number and the state of the interrupt (Pending,
-Active, or Pending+Active).  When the guest ACKs and EOIs a virtual
-interrupt, the LR state moves from Pending to Active, and finally to
-inactive.
-
-The LRs include an extra bit, called the HW bit.  When this bit is set,
-KVM must also program an additional field in the LR, the physical IRQ
-number, to link the virtual with the physical IRQ.
-
-When the HW bit is set, KVM must EITHER set the Pending OR the Active
-bit, never both at the same time.
-
-Setting the HW bit causes the hardware to deactivate the physical
-interrupt on the physical distributor when the guest deactivates the
-corresponding virtual interrupt.
-
-
-Forwarded Physical Interrupts Life Cycle
-----------------------------------------
-
-The state of forwarded physical interrupts is managed in the following way:
-
-  - The physical interrupt is acked by the host, and becomes active on
-    the physical distributor (*).
-  - KVM sets the LR.Pending bit, because this is the only way the GICV
-    interface is going to present it to the guest.
-  - LR.Pending will stay set as long as the guest has not acked the interrupt.
-  - LR.Pending transitions to LR.Active on the guest read of the IAR, as
-    expected.
-  - On guest EOI, the *physical distributor* active bit gets cleared,
-    but the LR.Active is left untouched (set).
-  - KVM clears the LR on VM exits when the physical distributor
-    active state has been cleared.
-
-(*): The host handling is slightly more complicated.  For some forwarded
-interrupts (shared), KVM directly sets the active state on the physical
-distributor before entering the guest, because the interrupt is never actually
-handled on the host (see details on the timer as an example below).  For other
-forwarded interrupts (non-shared) the host does not deactivate the interrupt
-when the host ISR completes, but leaves the interrupt active until the guest
-deactivates it.  Leaving the interrupt active is allowed, because Linux
-configures the physical GIC with EOIMode=1, which causes EOI operations to
-perform a priority drop allowing the GIC to receive other interrupts of the
-default priority.
-
-
-Forwarded Edge and Level Triggered PPIs and SPIs
-------------------------------------------------
-Forwarded physical interrupts injected should always be active on the
-physical distributor when injected to a guest.
-
-Level-triggered interrupts will keep the interrupt line to the GIC
-asserted, typically until the guest programs the device to deassert the
-line.  This means that the interrupt will remain pending on the physical
-distributor until the guest has reprogrammed the device.  Since we
-always run the VM with interrupts enabled on the CPU, a pending
-interrupt will exit the guest as soon as we switch into the guest,
-preventing the guest from ever making progress as the process repeats
-over and over.  Therefore, the active state on the physical distributor
-must be set when entering the guest, preventing the GIC from forwarding
-the pending interrupt to the CPU.  As soon as the guest deactivates the
-interrupt, the physical line is sampled by the hardware again and the host
-takes a new interrupt if and only if the physical line is still asserted.
-
-Edge-triggered interrupts do not exhibit the same problem with
-preventing guest execution that level-triggered interrupts do.  One
-option is to not use HW bit at all, and inject edge-triggered interrupts
-from a physical device as pure virtual interrupts.  But that would
-potentially slow down handling of the interrupt in the guest, because a
-physical interrupt occurring in the middle of the guest ISR would
-preempt the guest for the host to handle the interrupt.  Additionally,
-if you configure the system to handle interrupts on a separate physical
-core from that running your VCPU, you still have to interrupt the VCPU
-to queue the pending state onto the LR, even though the guest won't use
-this information until the guest ISR completes.  Therefore, the HW
-bit should always be set for forwarded edge-triggered interrupts.  With
-the HW bit set, the virtual interrupt is injected and additional
-physical interrupts occurring before the guest deactivates the interrupt
-simply mark the state on the physical distributor as Pending+Active.  As
-soon as the guest deactivates the interrupt, the host takes another
-interrupt if and only if there was a physical interrupt between injecting
-the forwarded interrupt to the guest and the guest deactivating the
-interrupt.
-
-Consequently, whenever we schedule a VCPU with one or more LRs with the
-HW bit set, the interrupt must also be active on the physical
-distributor.
-
-
-Forwarded LPIs
---------------
-LPIs, introduced in GICv3, are always edge-triggered and do not have an
-active state.  They become pending when a device signal them, and as
-soon as they are acked by the CPU, they are inactive again.
-
-It therefore doesn't make sense, and is not supported, to set the HW bit
-for physical LPIs that are forwarded to a VM as virtual interrupts,
-typically virtual SPIs.
-
-For LPIs, there is no other choice than to preempt the VCPU thread if
-necessary, and queue the pending state onto the LR.
-
-
-Putting It Together: The Architected Timer
-------------------------------------------
-The architected timer is a device that signals interrupts with level
-triggered semantics.  The timer hardware is directly accessed by VCPUs
-which program the timer to fire at some point in time.  Each VCPU on a
-system programs the timer to fire at different times, and therefore the
-hardware is multiplexed between multiple VCPUs.  This is implemented by
-context-switching the timer state along with each VCPU thread.
-
-However, this means that a scenario like the following is entirely
-possible, and in fact, typical:
-
-1.  KVM runs the VCPU
-2.  The guest programs the time to fire in T+100
-3.  The guest is idle and calls WFI (wait-for-interrupts)
-4.  The hardware traps to the host
-5.  KVM stores the timer state to memory and disables the hardware timer
-6.  KVM schedules a soft timer to fire in T+(100 - time since step 2)
-7.  KVM puts the VCPU thread to sleep (on a waitqueue)
-8.  The soft timer fires, waking up the VCPU thread
-9.  KVM reprograms the timer hardware with the VCPU's values
-10. KVM marks the timer interrupt as active on the physical distributor
-11. KVM injects a forwarded physical interrupt to the guest
-12. KVM runs the VCPU
-
-Notice that KVM injects a forwarded physical interrupt in step 11 without
-the corresponding interrupt having actually fired on the host.  That is
-exactly why we mark the timer interrupt as active in step 10, because
-the active state on the physical distributor is part of the state
-belonging to the timer hardware, which is context-switched along with
-the VCPU thread.
-
-If the guest does not idle because it is busy, the flow looks like this
-instead:
-
-1.  KVM runs the VCPU
-2.  The guest programs the time to fire in T+100
-4.  At T+100 the timer fires and a physical IRQ causes the VM to exit
-    (note that this initially only traps to EL2 and does not run the host ISR
-    until KVM has returned to the host).
-5.  With interrupts still disabled on the CPU coming back from the guest, KVM
-    stores the virtual timer state to memory and disables the virtual hw timer.
-6.  KVM looks at the timer state (in memory) and injects a forwarded physical
-    interrupt because it concludes the timer has expired.
-7.  KVM marks the timer interrupt as active on the physical distributor
-7.  KVM enables the timer, enables interrupts, and runs the VCPU
-
-Notice that again the forwarded physical interrupt is injected to the
-guest without having actually been handled on the host.  In this case it
-is because the physical interrupt is never actually seen by the host because the
-timer is disabled upon guest return, and the virtual forwarded interrupt is
-injected on the KVM guest entry path.
index 3d22eb87f919a81ce530baeee88247c75521a4ef..9003bd19cb701852184a7f3ca0edb7743fa47eea 100644 (file)
@@ -131,7 +131,7 @@ static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
 static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
 {
        unsigned long cpsr_mode = vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr & MODE_MASK;
-       return cpsr_mode > USR_MODE;;
+       return cpsr_mode > USR_MODE;
 }
 
 static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
index a9f7d3f47134a96536480275168247e309d78388..6394fb99da7f0bbe152fe7997034ba68f9e9473b 100644 (file)
@@ -48,6 +48,8 @@
        KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_IRQ_PENDING    KVM_ARCH_REQ(1)
 
+DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
index ab20ffa8b9e7658bc73779b7b6ae711f140d2105..1ab8329e9ff75cbb617523af3e7150a3b38ba7ce 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
 #include <asm/cp15.h>
-#include <asm/kvm_mmu.h>
 #include <asm/vfp.h>
 
 #define __hyp_text __section(.hyp.text) notrace
@@ -69,6 +68,8 @@
 #define HIFAR          __ACCESS_CP15(c6, 4, c0, 2)
 #define HPFAR          __ACCESS_CP15(c6, 4, c0, 4)
 #define ICIALLUIS      __ACCESS_CP15(c7, 0, c1, 0)
+#define BPIALLIS       __ACCESS_CP15(c7, 0, c1, 6)
+#define ICIMVAU                __ACCESS_CP15(c7, 0, c5, 1)
 #define ATS1CPR                __ACCESS_CP15(c7, 0, c8, 0)
 #define TLBIALLIS      __ACCESS_CP15(c8, 0, c3, 0)
 #define TLBIALL                __ACCESS_CP15(c8, 0, c7, 0)
index fa6f2174276bdd665519a2cafcaf737e3fda2ce5..bc70a1f0f42d4c0abf2d0806a52b200aa94f65a8 100644 (file)
@@ -37,6 +37,8 @@
 
 #include <linux/highmem.h>
 #include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/kvm_hyp.h>
 #include <asm/pgalloc.h>
 #include <asm/stage2_pgtable.h>
 
@@ -83,6 +85,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
        return pmd;
 }
 
+static inline pte_t kvm_s2pte_mkexec(pte_t pte)
+{
+       pte_val(pte) &= ~L_PTE_XN;
+       return pte;
+}
+
+static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
+{
+       pmd_val(pmd) &= ~PMD_SECT_XN;
+       return pmd;
+}
+
 static inline void kvm_set_s2pte_readonly(pte_t *pte)
 {
        pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY;
@@ -93,6 +107,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
        return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY;
 }
 
+static inline bool kvm_s2pte_exec(pte_t *pte)
+{
+       return !(pte_val(*pte) & L_PTE_XN);
+}
+
 static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
 {
        pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY;
@@ -103,6 +122,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
        return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
 }
 
+static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+{
+       return !(pmd_val(*pmd) & PMD_SECT_XN);
+}
+
 static inline bool kvm_page_empty(void *ptr)
 {
        struct page *ptr_page = virt_to_page(ptr);
@@ -126,10 +150,36 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
        return (vcpu_cp15(vcpu, c1_SCTLR) & 0b101) == 0b101;
 }
 
-static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
-                                              kvm_pfn_t pfn,
-                                              unsigned long size)
+static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
+{
+       /*
+        * Clean the dcache to the Point of Coherency.
+        *
+        * We need to do this through a kernel mapping (using the
+        * user-space mapping has proved to be the wrong
+        * solution). For that, we need to kmap one page at a time,
+        * and iterate over the range.
+        */
+
+       VM_BUG_ON(size & ~PAGE_MASK);
+
+       while (size) {
+               void *va = kmap_atomic_pfn(pfn);
+
+               kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+
+               size -= PAGE_SIZE;
+               pfn++;
+
+               kunmap_atomic(va);
+       }
+}
+
+static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
+                                                 unsigned long size)
 {
+       u32 iclsz;
+
        /*
         * If we are going to insert an instruction page and the icache is
         * either VIPT or PIPT, there is a potential problem where the host
@@ -141,23 +191,40 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
         *
         * VIVT caches are tagged using both the ASID and the VMID and doesn't
         * need any kind of flushing (DDI 0406C.b - Page B3-1392).
-        *
-        * We need to do this through a kernel mapping (using the
-        * user-space mapping has proved to be the wrong
-        * solution). For that, we need to kmap one page at a time,
-        * and iterate over the range.
         */
 
        VM_BUG_ON(size & ~PAGE_MASK);
 
+       if (icache_is_vivt_asid_tagged())
+               return;
+
+       if (!icache_is_pipt()) {
+               /* any kind of VIPT cache */
+               __flush_icache_all();
+               return;
+       }
+
+       /*
+        * CTR IminLine contains Log2 of the number of words in the
+        * cache line, so we can get the number of words as
+        * 2 << (IminLine - 1).  To get the number of bytes, we
+        * multiply by 4 (the number of bytes in a 32-bit word), and
+        * get 4 << (IminLine).
+        */
+       iclsz = 4 << (read_cpuid(CPUID_CACHETYPE) & 0xf);
+
        while (size) {
                void *va = kmap_atomic_pfn(pfn);
+               void *end = va + PAGE_SIZE;
+               void *addr = va;
 
-               kvm_flush_dcache_to_poc(va, PAGE_SIZE);
+               do {
+                       write_sysreg(addr, ICIMVAU);
+                       addr += iclsz;
+               } while (addr < end);
 
-               if (icache_is_pipt())
-                       __cpuc_coherent_user_range((unsigned long)va,
-                                                  (unsigned long)va + PAGE_SIZE);
+               dsb(ishst);
+               isb();
 
                size -= PAGE_SIZE;
                pfn++;
@@ -165,9 +232,11 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
                kunmap_atomic(va);
        }
 
-       if (!icache_is_pipt() && !icache_is_vivt_asid_tagged()) {
-               /* any kind of VIPT cache */
-               __flush_icache_all();
+       /* Check if we need to invalidate the BTB */
+       if ((read_cpuid_ext(CPUID_EXT_MMFR1) >> 28) != 4) {
+               write_sysreg(0, BPIALLIS);
+               dsb(ishst);
+               isb();
        }
 }
 
index 150ece66ddf34506cf8d36963c2461a8188ebe91..a757401129f9567cbdebea5249b60e7e9a117e87 100644 (file)
@@ -102,8 +102,8 @@ extern pgprot_t             pgprot_s2_device;
 #define PAGE_HYP_EXEC          _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY)
 #define PAGE_HYP_RO            _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY | L_PTE_XN)
 #define PAGE_HYP_DEVICE                _MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
-#define PAGE_S2                        _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE         _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY)
+#define PAGE_S2                        _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY | L_PTE_XN)
+#define PAGE_S2_DEVICE         _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY | L_PTE_XN)
 
 #define __PAGE_NONE            __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
 #define __PAGE_SHARED          __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
index 330c9ce34ba5f68ceb7907d9a41415d598fd0b0a..ae45ae96aac28bbd4395865c83d59664cead4838 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <asm/kvm_asm.h>
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 
 __asm__(".arch_extension     virt");
 
index 6d810af2d9fd7c630603ee5bfa8108c42a9992d8..c0edd450e10459612e37cc292ad8585494d12773 100644 (file)
@@ -19,6 +19,7 @@
  */
 
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 
 /**
  * Flush per-VMID TLBs
index b3da6c886835964a23288972e5d992b2aa87330d..03064261ee0bb69c20663341487783efb07da22e 100644 (file)
        isb
        .endm
 
-       .macro  uaccess_ttbr0_disable, tmp1
+       .macro  uaccess_ttbr0_disable, tmp1, tmp2
 alternative_if_not ARM64_HAS_PAN
        __uaccess_ttbr0_disable \tmp1
 alternative_else_nop_endif
        .endm
 
-       .macro  uaccess_ttbr0_enable, tmp1, tmp2
+       .macro  uaccess_ttbr0_enable, tmp1, tmp2, tmp3
 alternative_if_not ARM64_HAS_PAN
        save_and_disable_irq \tmp2              // avoid preemption
        __uaccess_ttbr0_enable \tmp1
@@ -39,18 +39,18 @@ alternative_if_not ARM64_HAS_PAN
 alternative_else_nop_endif
        .endm
 #else
-       .macro  uaccess_ttbr0_disable, tmp1
+       .macro  uaccess_ttbr0_disable, tmp1, tmp2
        .endm
 
-       .macro  uaccess_ttbr0_enable, tmp1, tmp2
+       .macro  uaccess_ttbr0_enable, tmp1, tmp2, tmp3
        .endm
 #endif
 
 /*
  * These macros are no-ops when UAO is present.
  */
-       .macro  uaccess_disable_not_uao, tmp1
-       uaccess_ttbr0_disable \tmp1
+       .macro  uaccess_disable_not_uao, tmp1, tmp2
+       uaccess_ttbr0_disable \tmp1, \tmp2
 alternative_if ARM64_ALT_PAN_NOT_UAO
        SET_PSTATE_PAN(1)
 alternative_else_nop_endif
index aef72d886677758c76d6b932c863893df7c67b53..0884e1fdfd30321344a86d1825b49a4aa2e841a3 100644 (file)
@@ -387,6 +387,27 @@ alternative_endif
        dsb     \domain
        .endm
 
+/*
+ * Macro to perform an instruction cache maintenance for the interval
+ * [start, end)
+ *
+ *     start, end:     virtual addresses describing the region
+ *     label:          A label to branch to on user fault.
+ *     Corrupts:       tmp1, tmp2
+ */
+       .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+       icache_line_size \tmp1, \tmp2
+       sub     \tmp2, \tmp1, #1
+       bic     \tmp2, \start, \tmp2
+9997:
+USER(\label, ic        ivau, \tmp2)                    // invalidate I line PoU
+       add     \tmp2, \tmp2, \tmp1
+       cmp     \tmp2, \end
+       b.lo    9997b
+       dsb     ish
+       isb
+       .endm
+
 /*
  * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
  */
index 955130762a3c6acc09f3ee76574f7cefc5097b4c..bef9f418f08986830e68f6bc2c41c88eb5333a29 100644 (file)
  *             - start  - virtual start address
  *             - end    - virtual end address
  *
+ *     invalidate_icache_range(start, end)
+ *
+ *             Invalidate the I-cache in the region described by start, end.
+ *             - start  - virtual start address
+ *             - end    - virtual end address
+ *
  *     __flush_cache_user_range(start, end)
  *
  *             Ensure coherency between the I-cache and the D-cache in the
@@ -66,6 +72,7 @@
  *             - size   - region size
  */
 extern void flush_icache_range(unsigned long start, unsigned long end);
+extern int  invalidate_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
 extern void __inval_dcache_area(void *addr, size_t len);
 extern void __clean_dcache_area_poc(void *addr, size_t len);
index ea6cb5b24258be29f39507c39d526ea7e52c7c30..e7218cf7df2a2cc1d86a5eaca88c1fdc0fbad878 100644 (file)
@@ -47,6 +47,8 @@
        KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_IRQ_PENDING    KVM_ARCH_REQ(1)
 
+DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext);
index 08d3bb66c8b75be2a39a1fa35946126db07cf479..f26f9cd70c721a5ae078c6294eae15ede20a8708 100644 (file)
@@ -20,7 +20,6 @@
 
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
-#include <asm/kvm_mmu.h>
 #include <asm/sysreg.h>
 
 #define __hyp_text __section(.hyp.text) notrace
index 672c8684d5c2a796fadae762846c1f314016c7c3..06f1f979467996fd15727f10fb92aa4f67c8b377 100644 (file)
@@ -173,6 +173,18 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
        return pmd;
 }
 
+static inline pte_t kvm_s2pte_mkexec(pte_t pte)
+{
+       pte_val(pte) &= ~PTE_S2_XN;
+       return pte;
+}
+
+static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
+{
+       pmd_val(pmd) &= ~PMD_S2_XN;
+       return pmd;
+}
+
 static inline void kvm_set_s2pte_readonly(pte_t *pte)
 {
        pteval_t old_pteval, pteval;
@@ -191,6 +203,11 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
        return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
 }
 
+static inline bool kvm_s2pte_exec(pte_t *pte)
+{
+       return !(pte_val(*pte) & PTE_S2_XN);
+}
+
 static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
 {
        kvm_set_s2pte_readonly((pte_t *)pmd);
@@ -201,6 +218,11 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
        return kvm_s2pte_readonly((pte_t *)pmd);
 }
 
+static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+{
+       return !(pmd_val(*pmd) & PMD_S2_XN);
+}
+
 static inline bool kvm_page_empty(void *ptr)
 {
        struct page *ptr_page = virt_to_page(ptr);
@@ -230,21 +252,25 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
        return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
 }
 
-static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu,
-                                              kvm_pfn_t pfn,
-                                              unsigned long size)
+static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
 {
        void *va = page_address(pfn_to_page(pfn));
 
        kvm_flush_dcache_to_poc(va, size);
+}
 
+static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
+                                                 unsigned long size)
+{
        if (icache_is_aliasing()) {
                /* any kind of VIPT cache */
                __flush_icache_all();
        } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
                /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
-               flush_icache_range((unsigned long)va,
-                                  (unsigned long)va + size);
+               void *va = page_address(pfn_to_page(pfn));
+
+               invalidate_icache_range((unsigned long)va,
+                                       (unsigned long)va + size);
        }
 }
 
index eb0c2bd90de903469790a66c6d7f528af64c9d76..af035331fb096a029bbce74ff856cf95b9755cac 100644 (file)
  */
 #define PTE_S2_RDONLY          (_AT(pteval_t, 1) << 6)   /* HAP[2:1] */
 #define PTE_S2_RDWR            (_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
+#define PTE_S2_XN              (_AT(pteval_t, 2) << 53)  /* XN[1:0] */
 
 #define PMD_S2_RDONLY          (_AT(pmdval_t, 1) << 6)   /* HAP[2:1] */
 #define PMD_S2_RDWR            (_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+#define PMD_S2_XN              (_AT(pmdval_t, 2) << 53)  /* XN[1:0] */
 
 /*
  * Memory Attribute override for Stage-2 (MemAttr[3:0])
index 0a5635fb0ef9843f0eac0a328e0c763d76bf4444..4e12dabd342b0676f75b25ff6fc2006f5382647e 100644 (file)
@@ -60,8 +60,8 @@
 #define PAGE_HYP_RO            __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
 #define PAGE_HYP_DEVICE                __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
 
-#define PAGE_S2                        __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE         __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
+#define PAGE_S2                        __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
+#define PAGE_S2_DEVICE         __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
 
 #define PAGE_NONE              __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
 #define PAGE_SHARED            __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
index 321c9c05dd9e09fc0c745a4543a286b7628f00a4..360455f863461aa56b981b2d52e4097aedf17873 100644 (file)
@@ -21,6 +21,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 
 #define read_debug(r,n)                read_sysreg(r##n##_el1)
 #define write_debug(v,r,n)     write_sysreg(v, r##n##_el1)
index f7c651f3a8c0e8001bb11b1a45216ac6d4a6b342..f3d8bed096f50a435d113a95a9f0f377cd5221a0 100644 (file)
@@ -21,6 +21,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 #include <asm/fpsimd.h>
 #include <asm/debug-monitors.h>
 
index 73464a96c3657e41d8088644b14845503024446c..131c7772703c290c3cd27c1a539090928dfc8e73 100644 (file)
@@ -16,6 +16,7 @@
  */
 
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 #include <asm/tlbflush.h>
 
 static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm)
index e88fb99c15616397e6c4bbc0dd9c26e504688d97..8932e5f7a6f396cd6ae377acb881208a8d5fd668 100644 (file)
@@ -50,7 +50,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
        b.mi    5f
 uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
 5:     mov     x0, #0
-       uaccess_disable_not_uao x2
+       uaccess_disable_not_uao x2, x3
        ret
 ENDPROC(__clear_user)
 
index 4b5d826895ff161347dec9dd48710d08bcc49699..bc108634992c1fd2d8065b183785f56c12a37756 100644 (file)
@@ -67,7 +67,7 @@ ENTRY(__arch_copy_from_user)
        uaccess_enable_not_uao x3, x4
        add     end, x0, x2
 #include "copy_template.S"
-       uaccess_disable_not_uao x3
+       uaccess_disable_not_uao x3, x4
        mov     x0, #0                          // Nothing to copy
        ret
 ENDPROC(__arch_copy_from_user)
index b24a830419ad95001a1b635ea169b20d0ea73d3a..e6dd59dd40534a491ce3a75d9b74a6739bb196d6 100644 (file)
@@ -68,7 +68,7 @@ ENTRY(raw_copy_in_user)
        uaccess_enable_not_uao x3, x4
        add     end, x0, x2
 #include "copy_template.S"
-       uaccess_disable_not_uao x3
+       uaccess_disable_not_uao x3, x4
        mov     x0, #0
        ret
 ENDPROC(raw_copy_in_user)
index 351f0766f7a61c54a47427c6a6845521fda6066d..bd20f9f7dd84259a1469f830aede41ca4db43b6f 100644 (file)
@@ -66,7 +66,7 @@ ENTRY(__arch_copy_to_user)
        uaccess_enable_not_uao x3, x4
        add     end, x0, x2
 #include "copy_template.S"
-       uaccess_disable_not_uao x3
+       uaccess_disable_not_uao x3, x4
        mov     x0, #0
        ret
 ENDPROC(__arch_copy_to_user)
index 7f1dbe962cf581c88eb488f01636f78996b4ee43..758bde7e2fa68a9d1a241857ad593b39a4d6e705 100644 (file)
@@ -49,7 +49,7 @@ ENTRY(flush_icache_range)
  *     - end     - virtual end address of region
  */
 ENTRY(__flush_cache_user_range)
-       uaccess_ttbr0_enable x2, x3
+       uaccess_ttbr0_enable x2, x3, x4
        dcache_line_size x2, x3
        sub     x3, x2, #1
        bic     x4, x0, x3
@@ -60,19 +60,10 @@ user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
        b.lo    1b
        dsb     ish
 
-       icache_line_size x2, x3
-       sub     x3, x2, #1
-       bic     x4, x0, x3
-1:
-USER(9f, ic    ivau, x4        )               // invalidate I line PoU
-       add     x4, x4, x2
-       cmp     x4, x1
-       b.lo    1b
-       dsb     ish
-       isb
+       invalidate_icache_by_line x0, x1, x2, x3, 9f
        mov     x0, #0
 1:
-       uaccess_ttbr0_disable x1
+       uaccess_ttbr0_disable x1, x2
        ret
 9:
        mov     x0, #-EFAULT
@@ -80,6 +71,27 @@ USER(9f, ic  ivau, x4        )               // invalidate I line PoU
 ENDPROC(flush_icache_range)
 ENDPROC(__flush_cache_user_range)
 
+/*
+ *     invalidate_icache_range(start,end)
+ *
+ *     Ensure that the I cache is invalid within specified region.
+ *
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
+ */
+ENTRY(invalidate_icache_range)
+       uaccess_ttbr0_enable x2, x3, x4
+
+       invalidate_icache_by_line x0, x1, x2, x3, 2f
+       mov     x0, xzr
+1:
+       uaccess_ttbr0_disable x1, x2
+       ret
+2:
+       mov     x0, #-EFAULT
+       b       1b
+ENDPROC(invalidate_icache_range)
+
 /*
  *     __flush_dcache_area(kaddr, size)
  *
index 401ceb71540c7440256fe7e6f7d71bbf982f529f..c5f05c4a4d00883422ed6e211135302cff3be14f 100644 (file)
@@ -101,12 +101,12 @@ ENTRY(privcmd_call)
         * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
         * is enabled (it implies that hardware UAO and PAN disabled).
         */
-       uaccess_ttbr0_enable x6, x7
+       uaccess_ttbr0_enable x6, x7, x8
        hvc XEN_IMM
 
        /*
         * Disable userspace access from kernel once the hyp call completed.
         */
-       uaccess_ttbr0_disable x6
+       uaccess_ttbr0_disable x6, x7
        ret
 ENDPROC(privcmd_call);
index 6e45608b2399813329e2280e601c2465940def53..b1dcfde0a3ef161681152f55f7a49ee5d90dde53 100644 (file)
@@ -90,6 +90,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu);
 
 void kvm_timer_init_vhe(void);
 
+bool kvm_arch_timer_get_input_level(int vintid);
+
 #define vcpu_vtimer(v) (&(v)->arch.timer_cpu.vtimer)
 #define vcpu_ptimer(v) (&(v)->arch.timer_cpu.ptimer)
 
index 8c896540a72cf4e933556627fa04bce0bf1d2ce3..cdbd142ca7f2ea4513a7bcf44f877f9b3bbbebc3 100644 (file)
@@ -130,6 +130,17 @@ struct vgic_irq {
        u8 priority;
        enum vgic_irq_config config;    /* Level or edge */
 
+       /*
+        * Callback function pointer to in-kernel devices that can tell us the
+        * state of the input level of mapped level-triggered IRQ faster than
+        * peaking into the physical GIC.
+        *
+        * Always called in non-preemptible section and the functions can use
+        * kvm_arm_get_running_vcpu() to get the vcpu pointer for private
+        * IRQs.
+        */
+       bool (*get_input_level)(int vintid);
+
        void *owner;                    /* Opaque pointer to reserve an interrupt
                                           for in-kernel devices. */
 };
@@ -331,7 +342,7 @@ void kvm_vgic_init_cpu_hardware(void);
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
                        bool level, void *owner);
 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
-                         u32 vintid);
+                         u32 vintid, bool (*get_input_level)(int vindid));
 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
 
index f9555b1e7f158f5203c1aaba47002424d3279203..fb6bd9b9845ea741f5e06208c16a6239a18a7426 100644 (file)
@@ -97,15 +97,13 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
                pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
                return IRQ_NONE;
        }
-       vtimer = vcpu_vtimer(vcpu);
 
-       if (!vtimer->irq.level) {
-               vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-               if (kvm_timer_irq_can_fire(vtimer))
-                       kvm_timer_update_irq(vcpu, true, vtimer);
-       }
+       vtimer = vcpu_vtimer(vcpu);
+       if (kvm_timer_should_fire(vtimer))
+               kvm_timer_update_irq(vcpu, true, vtimer);
 
-       if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+       if (static_branch_unlikely(&userspace_irqchip_in_use) &&
+           unlikely(!irqchip_in_kernel(vcpu->kvm)))
                kvm_vtimer_update_mask_user(vcpu);
 
        return IRQ_HANDLED;
@@ -231,6 +229,16 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
 {
        u64 cval, now;
 
+       if (timer_ctx->loaded) {
+               u32 cnt_ctl;
+
+               /* Only the virtual timer can be loaded so far */
+               cnt_ctl = read_sysreg_el0(cntv_ctl);
+               return  (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
+                       (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
+                      !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
+       }
+
        if (!kvm_timer_irq_can_fire(timer_ctx))
                return false;
 
@@ -245,15 +253,7 @@ bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
-       if (vtimer->irq.level || ptimer->irq.level)
-               return true;
-
-       /*
-        * When this is called from withing the wait loop of kvm_vcpu_block(),
-        * the software view of the timer state is up to date (timer->loaded
-        * is false), and so we can simply check if the timer should fire now.
-        */
-       if (!vtimer->loaded && kvm_timer_should_fire(vtimer))
+       if (kvm_timer_should_fire(vtimer))
                return true;
 
        return kvm_timer_should_fire(ptimer);
@@ -271,9 +271,9 @@ void kvm_timer_update_run(struct kvm_vcpu *vcpu)
        /* Populate the device bitmap with the timer states */
        regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
                                    KVM_ARM_DEV_EL1_PTIMER);
-       if (vtimer->irq.level)
+       if (kvm_timer_should_fire(vtimer))
                regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
-       if (ptimer->irq.level)
+       if (kvm_timer_should_fire(ptimer))
                regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
 }
 
@@ -286,7 +286,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
        trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq,
                                   timer_ctx->irq.level);
 
-       if (likely(irqchip_in_kernel(vcpu->kvm))) {
+       if (!static_branch_unlikely(&userspace_irqchip_in_use) ||
+           likely(irqchip_in_kernel(vcpu->kvm))) {
                ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
                                          timer_ctx->irq.irq,
                                          timer_ctx->irq.level,
@@ -324,12 +325,20 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+       bool level;
 
        if (unlikely(!timer->enabled))
                return;
 
-       if (kvm_timer_should_fire(vtimer) != vtimer->irq.level)
-               kvm_timer_update_irq(vcpu, !vtimer->irq.level, vtimer);
+       /*
+        * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part
+        * of its lifecycle is offloaded to the hardware, and we therefore may
+        * not have lowered the irq.level value before having to signal a new
+        * interrupt, but have to signal an interrupt every time the level is
+        * asserted.
+        */
+       level = kvm_timer_should_fire(vtimer);
+       kvm_timer_update_irq(vcpu, level, vtimer);
 
        if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
                kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
@@ -337,6 +346,12 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
        phys_timer_emulate(vcpu);
 }
 
+static void __timer_snapshot_state(struct arch_timer_context *timer)
+{
+       timer->cnt_ctl = read_sysreg_el0(cntv_ctl);
+       timer->cnt_cval = read_sysreg_el0(cntv_cval);
+}
+
 static void vtimer_save_state(struct kvm_vcpu *vcpu)
 {
        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -348,10 +363,8 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
        if (!vtimer->loaded)
                goto out;
 
-       if (timer->enabled) {
-               vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-               vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
-       }
+       if (timer->enabled)
+               __timer_snapshot_state(vtimer);
 
        /* Disable the virtual timer */
        write_sysreg_el0(0, cntv_ctl);
@@ -448,8 +461,7 @@ static void kvm_timer_vcpu_load_vgic(struct kvm_vcpu *vcpu)
        bool phys_active;
        int ret;
 
-       phys_active = vtimer->irq.level ||
-                     kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
+       phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
 
        ret = irq_set_irqchip_state(host_vtimer_irq,
                                    IRQCHIP_STATE_ACTIVE,
@@ -496,8 +508,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
        vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
        plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
 
-       return vtimer->irq.level != vlevel ||
-              ptimer->irq.level != plevel;
+       return kvm_timer_should_fire(vtimer) != vlevel ||
+              kvm_timer_should_fire(ptimer) != plevel;
 }
 
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
@@ -529,54 +541,27 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
        set_cntvoff(0);
 }
 
-static void unmask_vtimer_irq(struct kvm_vcpu *vcpu)
+/*
+ * With a userspace irqchip we have to check if the guest de-asserted the
+ * timer and if so, unmask the timer irq signal on the host interrupt
+ * controller to ensure that we see future timer signals.
+ */
+static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
 {
        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 
        if (unlikely(!irqchip_in_kernel(vcpu->kvm))) {
-               kvm_vtimer_update_mask_user(vcpu);
-               return;
-       }
-
-       /*
-        * If the guest disabled the timer without acking the interrupt, then
-        * we must make sure the physical and virtual active states are in
-        * sync by deactivating the physical interrupt, because otherwise we
-        * wouldn't see the next timer interrupt in the host.
-        */
-       if (!kvm_vgic_map_is_active(vcpu, vtimer->irq.irq)) {
-               int ret;
-               ret = irq_set_irqchip_state(host_vtimer_irq,
-                                           IRQCHIP_STATE_ACTIVE,
-                                           false);
-               WARN_ON(ret);
+               __timer_snapshot_state(vtimer);
+               if (!kvm_timer_should_fire(vtimer)) {
+                       kvm_timer_update_irq(vcpu, false, vtimer);
+                       kvm_vtimer_update_mask_user(vcpu);
+               }
        }
 }
 
-/**
- * kvm_timer_sync_hwstate - sync timer state from cpu
- * @vcpu: The vcpu pointer
- *
- * Check if any of the timers have expired while we were running in the guest,
- * and inject an interrupt if that was the case.
- */
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-       struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
-       /*
-        * If we entered the guest with the vtimer output asserted we have to
-        * check if the guest has modified the timer so that we should lower
-        * the line at this point.
-        */
-       if (vtimer->irq.level) {
-               vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-               vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
-               if (!kvm_timer_should_fire(vtimer)) {
-                       kvm_timer_update_irq(vcpu, false, vtimer);
-                       unmask_vtimer_irq(vcpu);
-               }
-       }
+       unmask_vtimer_irq_user(vcpu);
 }
 
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -807,6 +792,19 @@ static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
        return true;
 }
 
+bool kvm_arch_timer_get_input_level(int vintid)
+{
+       struct kvm_vcpu *vcpu = kvm_arm_get_running_vcpu();
+       struct arch_timer_context *timer;
+
+       if (vintid == vcpu_vtimer(vcpu)->irq.irq)
+               timer = vcpu_vtimer(vcpu);
+       else
+               BUG(); /* We only map the vtimer so far */
+
+       return kvm_timer_should_fire(timer);
+}
+
 int kvm_timer_enable(struct kvm_vcpu *vcpu)
 {
        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -828,7 +826,8 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
                return -EINVAL;
        }
 
-       ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq);
+       ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq,
+                                   kvm_arch_timer_get_input_level);
        if (ret)
                return ret;
 
index cd7d90c9f644d18743020672afbbc7ae06dcbbb9..92b95ae9a2ca04b5a474201e0908d265973b1959 100644 (file)
@@ -71,17 +71,17 @@ static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
 
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
-       BUG_ON(preemptible());
        __this_cpu_write(kvm_arm_running_vcpu, vcpu);
 }
 
+DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+
 /**
  * kvm_arm_get_running_vcpu - get the vcpu running on the current CPU.
  * Must be called from non-preemptible context
  */
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
 {
-       BUG_ON(preemptible());
        return __this_cpu_read(kvm_arm_running_vcpu);
 }
 
@@ -295,6 +295,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
+       if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm)))
+               static_branch_dec(&userspace_irqchip_in_use);
+
        kvm_mmu_free_memory_caches(vcpu);
        kvm_timer_vcpu_terminate(vcpu);
        kvm_pmu_vcpu_destroy(vcpu);
@@ -532,14 +535,22 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 
        vcpu->arch.has_run_once = true;
 
-       /*
-        * Map the VGIC hardware resources before running a vcpu the first
-        * time on this VM.
-        */
-       if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) {
-               ret = kvm_vgic_map_resources(kvm);
-               if (ret)
-                       return ret;
+       if (likely(irqchip_in_kernel(kvm))) {
+               /*
+                * Map the VGIC hardware resources before running a vcpu the
+                * first time on this VM.
+                */
+               if (unlikely(!vgic_ready(kvm))) {
+                       ret = kvm_vgic_map_resources(kvm);
+                       if (ret)
+                               return ret;
+               }
+       } else {
+               /*
+                * Tell the rest of the code that there are userspace irqchip
+                * VMs in the wild.
+                */
+               static_branch_inc(&userspace_irqchip_in_use);
        }
 
        ret = kvm_timer_enable(vcpu);
@@ -680,18 +691,29 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                kvm_vgic_flush_hwstate(vcpu);
 
                /*
-                * If we have a singal pending, or need to notify a userspace
-                * irqchip about timer or PMU level changes, then we exit (and
-                * update the timer level state in kvm_timer_update_run
-                * below).
+                * Exit if we have a signal pending so that we can deliver the
+                * signal to user space.
                 */
-               if (signal_pending(current) ||
-                   kvm_timer_should_notify_user(vcpu) ||
-                   kvm_pmu_should_notify_user(vcpu)) {
+               if (signal_pending(current)) {
                        ret = -EINTR;
                        run->exit_reason = KVM_EXIT_INTR;
                }
 
+               /*
+                * If we're using a userspace irqchip, then check if we need
+                * to tell a userspace irqchip about timer or PMU level
+                * changes and if so, exit to userspace (the actual level
+                * state gets updated in kvm_timer_update_run and
+                * kvm_pmu_update_run below).
+                */
+               if (static_branch_unlikely(&userspace_irqchip_in_use)) {
+                       if (kvm_timer_should_notify_user(vcpu) ||
+                           kvm_pmu_should_notify_user(vcpu)) {
+                               ret = -EINTR;
+                               run->exit_reason = KVM_EXIT_INTR;
+                       }
+               }
+
                /*
                 * Ensure we set mode to IN_GUEST_MODE after we disable
                 * interrupts and before the final VCPU requests check.
@@ -704,7 +726,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                    kvm_request_pending(vcpu)) {
                        vcpu->mode = OUTSIDE_GUEST_MODE;
                        kvm_pmu_sync_hwstate(vcpu);
-                       kvm_timer_sync_hwstate(vcpu);
+                       if (static_branch_unlikely(&userspace_irqchip_in_use))
+                               kvm_timer_sync_hwstate(vcpu);
                        kvm_vgic_sync_hwstate(vcpu);
                        local_irq_enable();
                        preempt_enable();
@@ -748,7 +771,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                 * we don't want vtimer interrupts to race with syncing the
                 * timer virtual interrupt state.
                 */
-               kvm_timer_sync_hwstate(vcpu);
+               if (static_branch_unlikely(&userspace_irqchip_in_use))
+                       kvm_timer_sync_hwstate(vcpu);
 
                /*
                 * We may have taken a host interrupt in HYP mode (ie
@@ -1277,6 +1301,7 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
                        cpu_hyp_reset();
 
                return NOTIFY_OK;
+       case CPU_PM_ENTER_FAILED:
        case CPU_PM_EXIT:
                if (__this_cpu_read(kvm_arm_hardware_enabled))
                        /* The hardware was enabled before suspend. */
index d7fd46fe9efb35ca28a0685b333f8c68a61b2d64..4fe6e797e8b3c5f19b43049303d616175fe3af47 100644 (file)
@@ -21,6 +21,7 @@
 
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 
 static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
 {
index b36945d49986dd5c0f097f16837d72d81f655308..a1ea43fa75cfd473932d8ee081fc1383dd3dac0a 100644 (file)
@@ -926,6 +926,25 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
        return 0;
 }
 
+static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr)
+{
+       pmd_t *pmdp;
+       pte_t *ptep;
+
+       pmdp = stage2_get_pmd(kvm, NULL, addr);
+       if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp))
+               return false;
+
+       if (pmd_thp_or_huge(*pmdp))
+               return kvm_s2pmd_exec(pmdp);
+
+       ptep = pte_offset_kernel(pmdp, addr);
+       if (!ptep || pte_none(*ptep) || !pte_present(*ptep))
+               return false;
+
+       return kvm_s2pte_exec(ptep);
+}
+
 static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
                          phys_addr_t addr, const pte_t *new_pte,
                          unsigned long flags)
@@ -1257,10 +1276,14 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
        kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
 }
 
-static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn,
-                                     unsigned long size)
+static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
 {
-       __coherent_cache_guest_page(vcpu, pfn, size);
+       __clean_dcache_guest_page(pfn, size);
+}
+
+static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
+{
+       __invalidate_icache_guest_page(pfn, size);
 }
 
 static void kvm_send_hwpoison_signal(unsigned long address,
@@ -1286,7 +1309,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                          unsigned long fault_status)
 {
        int ret;
-       bool write_fault, writable, hugetlb = false, force_pte = false;
+       bool write_fault, exec_fault, writable, hugetlb = false, force_pte = false;
        unsigned long mmu_seq;
        gfn_t gfn = fault_ipa >> PAGE_SHIFT;
        struct kvm *kvm = vcpu->kvm;
@@ -1298,7 +1321,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        unsigned long flags = 0;
 
        write_fault = kvm_is_write_fault(vcpu);
-       if (fault_status == FSC_PERM && !write_fault) {
+       exec_fault = kvm_vcpu_trap_is_iabt(vcpu);
+       VM_BUG_ON(write_fault && exec_fault);
+
+       if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
                kvm_err("Unexpected L2 read permission error\n");
                return -EFAULT;
        }
@@ -1391,7 +1417,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                        new_pmd = kvm_s2pmd_mkwrite(new_pmd);
                        kvm_set_pfn_dirty(pfn);
                }
-               coherent_cache_guest_page(vcpu, pfn, PMD_SIZE);
+
+               if (fault_status != FSC_PERM)
+                       clean_dcache_guest_page(pfn, PMD_SIZE);
+
+               if (exec_fault) {
+                       new_pmd = kvm_s2pmd_mkexec(new_pmd);
+                       invalidate_icache_guest_page(pfn, PMD_SIZE);
+               } else if (fault_status == FSC_PERM) {
+                       /* Preserve execute if XN was already cleared */
+                       if (stage2_is_exec(kvm, fault_ipa))
+                               new_pmd = kvm_s2pmd_mkexec(new_pmd);
+               }
+
                ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
        } else {
                pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -1401,7 +1439,19 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                        kvm_set_pfn_dirty(pfn);
                        mark_page_dirty(kvm, gfn);
                }
-               coherent_cache_guest_page(vcpu, pfn, PAGE_SIZE);
+
+               if (fault_status != FSC_PERM)
+                       clean_dcache_guest_page(pfn, PAGE_SIZE);
+
+               if (exec_fault) {
+                       new_pte = kvm_s2pte_mkexec(new_pte);
+                       invalidate_icache_guest_page(pfn, PAGE_SIZE);
+               } else if (fault_status == FSC_PERM) {
+                       /* Preserve execute if XN was already cleared */
+                       if (stage2_is_exec(kvm, fault_ipa))
+                               new_pte = kvm_s2pte_mkexec(new_pte);
+               }
+
                ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags);
        }
 
index 8e633bd9cc1e74706e0490419f27a20e6fa8e0b7..465095355666ec46246c29192127a6ec464f4472 100644 (file)
@@ -1034,10 +1034,8 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
 
        device = vgic_its_alloc_device(its, device_id, itt_addr,
                                       num_eventid_bits);
-       if (IS_ERR(device))
-               return PTR_ERR(device);
 
-       return 0;
+       return PTR_ERR_OR_ZERO(device);
 }
 
 /*
index deb51ee16a3da478401022699b5bbaff85b27702..83d82bd7dc4e714f61669e6aaf9e937b67f5eea1 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <kvm/iodev.h>
+#include <kvm/arm_arch_timer.h>
 #include <kvm/arm_vgic.h>
 
 #include "vgic.h"
@@ -122,10 +123,43 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
        return value;
 }
 
+/*
+ * This function will return the VCPU that performed the MMIO access and
+ * trapped from within the VM, and will return NULL if this is a userspace
+ * access.
+ *
+ * We can disable preemption locally around accessing the per-CPU variable,
+ * and use the resolved vcpu pointer after enabling preemption again, because
+ * even if the current thread is migrated to another CPU, reading the per-CPU
+ * value later will give us the same value as we update the per-CPU variable
+ * in the preempt notifier handlers.
+ */
+static struct kvm_vcpu *vgic_get_mmio_requester_vcpu(void)
+{
+       struct kvm_vcpu *vcpu;
+
+       preempt_disable();
+       vcpu = kvm_arm_get_running_vcpu();
+       preempt_enable();
+       return vcpu;
+}
+
+/* Must be called with irq->irq_lock held */
+static void vgic_hw_irq_spending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
+                                bool is_uaccess)
+{
+       if (is_uaccess)
+               return;
+
+       irq->pending_latch = true;
+       vgic_irq_set_phys_active(irq, true);
+}
+
 void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
                              gpa_t addr, unsigned int len,
                              unsigned long val)
 {
+       bool is_uaccess = !vgic_get_mmio_requester_vcpu();
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
        int i;
        unsigned long flags;
@@ -134,17 +168,45 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
                struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
 
                spin_lock_irqsave(&irq->irq_lock, flags);
-               irq->pending_latch = true;
-
+               if (irq->hw)
+                       vgic_hw_irq_spending(vcpu, irq, is_uaccess);
+               else
+                       irq->pending_latch = true;
                vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
                vgic_put_irq(vcpu->kvm, irq);
        }
 }
 
+/* Must be called with irq->irq_lock held */
+static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
+                                bool is_uaccess)
+{
+       if (is_uaccess)
+               return;
+
+       irq->pending_latch = false;
+
+       /*
+        * We don't want the guest to effectively mask the physical
+        * interrupt by doing a write to SPENDR followed by a write to
+        * CPENDR for HW interrupts, so we clear the active state on
+        * the physical side if the virtual interrupt is not active.
+        * This may lead to taking an additional interrupt on the
+        * host, but that should not be a problem as the worst that
+        * can happen is an additional vgic injection.  We also clear
+        * the pending state to maintain proper semantics for edge HW
+        * interrupts.
+        */
+       vgic_irq_set_phys_pending(irq, false);
+       if (!irq->active)
+               vgic_irq_set_phys_active(irq, false);
+}
+
 void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
                              gpa_t addr, unsigned int len,
                              unsigned long val)
 {
+       bool is_uaccess = !vgic_get_mmio_requester_vcpu();
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
        int i;
        unsigned long flags;
@@ -154,7 +216,10 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
 
                spin_lock_irqsave(&irq->irq_lock, flags);
 
-               irq->pending_latch = false;
+               if (irq->hw)
+                       vgic_hw_irq_cpending(vcpu, irq, is_uaccess);
+               else
+                       irq->pending_latch = false;
 
                spin_unlock_irqrestore(&irq->irq_lock, flags);
                vgic_put_irq(vcpu->kvm, irq);
@@ -181,27 +246,24 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
        return value;
 }
 
+/* Must be called with irq->irq_lock held */
+static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
+                                     bool active, bool is_uaccess)
+{
+       if (is_uaccess)
+               return;
+
+       irq->active = active;
+       vgic_irq_set_phys_active(irq, active);
+}
+
 static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
-                                   bool new_active_state)
+                                   bool active)
 {
-       struct kvm_vcpu *requester_vcpu;
        unsigned long flags;
-       spin_lock_irqsave(&irq->irq_lock, flags);
+       struct kvm_vcpu *requester_vcpu = vgic_get_mmio_requester_vcpu();
 
-       /*
-        * The vcpu parameter here can mean multiple things depending on how
-        * this function is called; when handling a trap from the kernel it
-        * depends on the GIC version, and these functions are also called as
-        * part of save/restore from userspace.
-        *
-        * Therefore, we have to figure out the requester in a reliable way.
-        *
-        * When accessing VGIC state from user space, the requester_vcpu is
-        * NULL, which is fine, because we guarantee that no VCPUs are running
-        * when accessing VGIC state from user space so irq->vcpu->cpu is
-        * always -1.
-        */
-       requester_vcpu = kvm_arm_get_running_vcpu();
+       spin_lock_irqsave(&irq->irq_lock, flags);
 
        /*
         * If this virtual IRQ was written into a list register, we
@@ -213,14 +275,23 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
         * vgic_change_active_prepare)  and still has to sync back this IRQ,
         * so we release and re-acquire the spin_lock to let the other thread
         * sync back the IRQ.
+        *
+        * When accessing VGIC state from user space, requester_vcpu is
+        * NULL, which is fine, because we guarantee that no VCPUs are running
+        * when accessing VGIC state from user space so irq->vcpu->cpu is
+        * always -1.
         */
        while (irq->vcpu && /* IRQ may have state in an LR somewhere */
               irq->vcpu != requester_vcpu && /* Current thread is not the VCPU thread */
               irq->vcpu->cpu != -1) /* VCPU thread is running */
                cond_resched_lock(&irq->irq_lock);
 
-       irq->active = new_active_state;
-       if (new_active_state)
+       if (irq->hw)
+               vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu);
+       else
+               irq->active = active;
+
+       if (irq->active)
                vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
        else
                spin_unlock_irqrestore(&irq->irq_lock, flags);
index 80897102da26ce51ddb95d01da15c61277c27b66..c32d7b93ffd194313f8cc062d2960d6a04a38d16 100644 (file)
@@ -105,6 +105,26 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
                                irq->pending_latch = false;
                }
 
+               /*
+                * Level-triggered mapped IRQs are special because we only
+                * observe rising edges as input to the VGIC.
+                *
+                * If the guest never acked the interrupt we have to sample
+                * the physical line and set the line level, because the
+                * device state could have changed or we simply need to
+                * process the still pending interrupt later.
+                *
+                * If this causes us to lower the level, we have to also clear
+                * the physical active state, since we will otherwise never be
+                * told when the interrupt becomes asserted again.
+                */
+               if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) {
+                       irq->line_level = vgic_get_phys_line_level(irq);
+
+                       if (!irq->line_level)
+                               vgic_irq_set_phys_active(irq, false);
+               }
+
                spin_unlock_irqrestore(&irq->irq_lock, flags);
                vgic_put_irq(vcpu->kvm, irq);
        }
@@ -162,6 +182,15 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
                        val |= GICH_LR_EOI;
        }
 
+       /*
+        * Level-triggered mapped IRQs are special because we only observe
+        * rising edges as input to the VGIC.  We therefore lower the line
+        * level here, so that we can take new virtual IRQs.  See
+        * vgic_v2_fold_lr_state for more info.
+        */
+       if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT))
+               irq->line_level = false;
+
        /* The GICv2 LR only holds five bits of priority. */
        val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
 
index f47e8481fa452d2b67aaca6fc6985a4fa05b4e77..6b329414e57a3c16207ab8e5ad81a0cc002b7f51 100644 (file)
@@ -96,6 +96,26 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
                                irq->pending_latch = false;
                }
 
+               /*
+                * Level-triggered mapped IRQs are special because we only
+                * observe rising edges as input to the VGIC.
+                *
+                * If the guest never acked the interrupt we have to sample
+                * the physical line and set the line level, because the
+                * device state could have changed or we simply need to
+                * process the still pending interrupt later.
+                *
+                * If this causes us to lower the level, we have to also clear
+                * the physical active state, since we will otherwise never be
+                * told when the interrupt becomes asserted again.
+                */
+               if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) {
+                       irq->line_level = vgic_get_phys_line_level(irq);
+
+                       if (!irq->line_level)
+                               vgic_irq_set_phys_active(irq, false);
+               }
+
                spin_unlock_irqrestore(&irq->irq_lock, flags);
                vgic_put_irq(vcpu->kvm, irq);
        }
@@ -145,6 +165,15 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
                        val |= ICH_LR_EOI;
        }
 
+       /*
+        * Level-triggered mapped IRQs are special because we only observe
+        * rising edges as input to the VGIC.  We therefore lower the line
+        * level here, so that we can take new virtual IRQs.  See
+        * vgic_v3_fold_lr_state for more info.
+        */
+       if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
+               irq->line_level = false;
+
        /*
         * We currently only support Group1 interrupts, which is a
         * known defect. This needs to be addressed at some point.
index ecb8e25f5fe56d69065757a80c44c2d4a532bbcb..c7c5ef190afa0c3984d5b9051f59ca72c696eb20 100644 (file)
@@ -144,6 +144,38 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
        kfree(irq);
 }
 
+void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
+{
+       WARN_ON(irq_set_irqchip_state(irq->host_irq,
+                                     IRQCHIP_STATE_PENDING,
+                                     pending));
+}
+
+bool vgic_get_phys_line_level(struct vgic_irq *irq)
+{
+       bool line_level;
+
+       BUG_ON(!irq->hw);
+
+       if (irq->get_input_level)
+               return irq->get_input_level(irq->intid);
+
+       WARN_ON(irq_get_irqchip_state(irq->host_irq,
+                                     IRQCHIP_STATE_PENDING,
+                                     &line_level));
+       return line_level;
+}
+
+/* Set/Clear the physical active state */
+void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
+{
+
+       BUG_ON(!irq->hw);
+       WARN_ON(irq_set_irqchip_state(irq->host_irq,
+                                     IRQCHIP_STATE_ACTIVE,
+                                     active));
+}
+
 /**
  * kvm_vgic_target_oracle - compute the target vcpu for an irq
  *
@@ -413,7 +445,8 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 
 /* @irq->irq_lock must be held */
 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
-                           unsigned int host_irq)
+                           unsigned int host_irq,
+                           bool (*get_input_level)(int vindid))
 {
        struct irq_desc *desc;
        struct irq_data *data;
@@ -433,6 +466,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
        irq->hw = true;
        irq->host_irq = host_irq;
        irq->hwintid = data->hwirq;
+       irq->get_input_level = get_input_level;
        return 0;
 }
 
@@ -441,10 +475,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
 {
        irq->hw = false;
        irq->hwintid = 0;
+       irq->get_input_level = NULL;
 }
 
 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
-                         u32 vintid)
+                         u32 vintid, bool (*get_input_level)(int vindid))
 {
        struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
        unsigned long flags;
@@ -453,7 +488,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
        BUG_ON(!irq);
 
        spin_lock_irqsave(&irq->irq_lock, flags);
-       ret = kvm_vgic_map_irq(vcpu, irq, host_irq);
+       ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
        spin_unlock_irqrestore(&irq->irq_lock, flags);
        vgic_put_irq(vcpu->kvm, irq);
 
index efbcf8f96f9c1a1bec87ce874103027c10de47ac..12c37b89f7a38212c5eec4a115b9e2bb20d28fdc 100644 (file)
@@ -104,6 +104,11 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
                return irq->pending_latch || irq->line_level;
 }
 
+static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq)
+{
+       return irq->config == VGIC_CONFIG_LEVEL && irq->hw;
+}
+
 /*
  * This struct provides an intermediate representation of the fields contained
  * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
@@ -140,6 +145,9 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
                              u32 intid);
 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
+bool vgic_get_phys_line_level(struct vgic_irq *irq);
+void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
+void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
                           unsigned long flags);
 void vgic_kick_vcpus(struct kvm *kvm);