Merge tag 'iommu-updates-v5.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 16 Sep 2019 21:14:40 +0000 (14:14 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 16 Sep 2019 21:14:40 +0000 (14:14 -0700)
Pull iommu updates from Joerg Roedel:

 - batched unmap support for the IOMMU-API

 - support for unlocked command queueing in the ARM-SMMU driver

 - rework the ATS support in the ARM-SMMU driver

 - more refactoring in the ARM-SMMU driver to support hardware
   implemention specific quirks and errata

 - bounce buffering DMA-API implementatation in the Intel VT-d driver
   for untrusted devices (like Thunderbolt devices)

 - fixes for runtime PM support in the OMAP iommu driver

 - MT8183 IOMMU support in the Mediatek IOMMU driver

 - rework of the way the IOMMU core sets the default domain type for
   groups. Changing the default domain type on x86 does not require two
   kernel parameters anymore.

 - more smaller fixes and cleanups

* tag 'iommu-updates-v5.4' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (113 commits)
  iommu/vt-d: Declare Broadwell igfx dmar support snafu
  iommu/vt-d: Add Scalable Mode fault information
  iommu/vt-d: Use bounce buffer for untrusted devices
  iommu/vt-d: Add trace events for device dma map/unmap
  iommu/vt-d: Don't switch off swiotlb if bounce page is used
  iommu/vt-d: Check whether device requires bounce buffer
  swiotlb: Split size parameter to map/unmap APIs
  iommu/omap: Mark pm functions __maybe_unused
  iommu/ipmmu-vmsa: Disable cache snoop transactions on R-Car Gen3
  iommu/ipmmu-vmsa: Move IMTTBCR_SL0_TWOBIT_* to restore sort order
  iommu: Don't use sme_active() in generic code
  iommu/arm-smmu-v3: Fix build error without CONFIG_PCI_ATS
  iommu/qcom: Use struct_size() helper
  iommu: Remove wrong default domain comments
  iommu/dma: Fix for dereferencing before null checking
  iommu/mediatek: Clean up struct mtk_smi_iommu
  memory: mtk-smi: Get rid of need_larbid
  iommu/mediatek: Fix VLD_PA_RNG register backup when suspend
  memory: mtk-smi: Add bus_sel for mt8183
  memory: mtk-smi: Invoke pm runtime_callback to enable clocks
  ...

63 files changed:
Documentation/admin-guide/kernel-parameters.txt
Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt
Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt
MAINTAINERS
arch/arm/mach-omap2/Makefile
arch/arm/mach-omap2/omap-iommu.c [new file with mode: 0644]
arch/ia64/include/asm/iommu.h
arch/ia64/kernel/pci-dma.c
arch/x86/include/asm/iommu.h
arch/x86/kernel/pci-dma.c
drivers/gpu/drm/panfrost/panfrost_mmu.c
drivers/iommu/Kconfig
drivers/iommu/Makefile
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu.h [new file with mode: 0644]
drivers/iommu/amd_iommu_init.c
drivers/iommu/amd_iommu_quirks.c [new file with mode: 0644]
drivers/iommu/amd_iommu_types.h
drivers/iommu/arm-smmu-impl.c [new file with mode: 0644]
drivers/iommu/arm-smmu-regs.h [deleted file]
drivers/iommu/arm-smmu-v3.c
drivers/iommu/arm-smmu.c
drivers/iommu/arm-smmu.h [new file with mode: 0644]
drivers/iommu/dma-iommu.c
drivers/iommu/dmar.c
drivers/iommu/exynos-iommu.c
drivers/iommu/intel-iommu.c
drivers/iommu/intel-trace.c [new file with mode: 0644]
drivers/iommu/intel_irq_remapping.c
drivers/iommu/io-pgtable-arm-v7s.c
drivers/iommu/io-pgtable-arm.c
drivers/iommu/iommu.c
drivers/iommu/iova.c
drivers/iommu/ipmmu-vmsa.c
drivers/iommu/msm_iommu.c
drivers/iommu/mtk_iommu.c
drivers/iommu/mtk_iommu.h
drivers/iommu/mtk_iommu_v1.c
drivers/iommu/omap-iommu.c
drivers/iommu/omap-iommu.h
drivers/iommu/qcom_iommu.c
drivers/iommu/rockchip-iommu.c
drivers/iommu/s390-iommu.c
drivers/iommu/tegra-gart.c
drivers/iommu/tegra-smmu.c
drivers/iommu/virtio-iommu.c
drivers/memory/mtk-smi.c
drivers/vfio/vfio_iommu_type1.c
drivers/xen/swiotlb-xen.c
include/dt-bindings/memory/mt8183-larb-port.h [new file with mode: 0644]
include/linux/amd-iommu.h
include/linux/blk_types.h
include/linux/intel-iommu.h
include/linux/io-pgtable.h
include/linux/iommu.h
include/linux/omap-iommu.h
include/linux/platform_data/iommu-omap.h
include/linux/swiotlb.h
include/soc/mediatek/smi.h
include/trace/events/intel_iommu.h [new file with mode: 0644]
kernel/dma/direct.c
kernel/dma/swiotlb.c

index 4c1971960afa30484cfe0d2533e9cf28d003662c..d31ffa110461156a8abdf2703514e57e3f42beb9 100644 (file)
                        Note that using this option lowers the security
                        provided by tboot because it makes the system
                        vulnerable to DMA attacks.
+               nobounce [Default off]
+                       Disable bounce buffer for unstrusted devices such as
+                       the Thunderbolt devices. This will treat the untrusted
+                       devices as the trusted ones, hence might expose security
+                       risks of DMA attacks.
 
        intel_idle.max_cstate=  [KNL,HW,ACPI,X86]
                        0       disables intel_idle and fall back on acpi_idle.
                          synchronously.
 
        iommu.passthrough=
-                       [ARM64] Configure DMA to bypass the IOMMU by default.
+                       [ARM64, X86] Configure DMA to bypass the IOMMU by default.
                        Format: { "0" | "1" }
                        0 - Use IOMMU translation for DMA.
                        1 - Bypass the IOMMU for DMA.
index 6922db598deface3f76f32e605499cf096060968..ce59a505f5a4c21ea288688b4d967e061274bc83 100644 (file)
@@ -11,10 +11,23 @@ ARM Short-Descriptor translation table format for address translation.
                |
               m4u (Multimedia Memory Management Unit)
                |
+          +--------+
+          |        |
+      gals0-rx   gals1-rx    (Global Async Local Sync rx)
+          |        |
+          |        |
+      gals0-tx   gals1-tx    (Global Async Local Sync tx)
+          |        |          Some SoCs may have GALS.
+          +--------+
+               |
            SMI Common(Smart Multimedia Interface Common)
                |
        +----------------+-------
        |                |
+       |             gals-rx        There may be GALS in some larbs.
+       |                |
+       |                |
+       |             gals-tx
        |                |
    SMI larb0        SMI larb1   ... SoCs have several SMI local arbiter(larb).
    (display)         (vdec)
@@ -36,6 +49,10 @@ each local arbiter.
 like display, video decode, and camera. And there are different ports
 in each larb. Take a example, There are many ports like MC, PP, VLD in the
 video decode local arbiter, all these ports are according to the video HW.
+  In some SoCs, there may be a GALS(Global Async Local Sync) module between
+smi-common and m4u, and additional GALS module between smi-larb and
+smi-common. GALS can been seen as a "asynchronous fifo" which could help
+synchronize for the modules in different clock frequency.
 
 Required properties:
 - compatible : must be one of the following string:
@@ -44,18 +61,25 @@ Required properties:
        "mediatek,mt7623-m4u", "mediatek,mt2701-m4u" for mt7623 which uses
                                                     generation one m4u HW.
        "mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW.
+       "mediatek,mt8183-m4u" for mt8183 which uses generation two m4u HW.
 - reg : m4u register base and size.
 - interrupts : the interrupt of m4u.
 - clocks : must contain one entry for each clock-names.
-- clock-names : must be "bclk", It is the block clock of m4u.
+- clock-names : Only 1 optional clock:
+  - "bclk": the block clock of m4u.
+  Here is the list which require this "bclk":
+  - mt2701, mt2712, mt7623 and mt8173.
+  Note that m4u use the EMI clock which always has been enabled before kernel
+  if there is no this "bclk".
 - mediatek,larbs : List of phandle to the local arbiters in the current Socs.
        Refer to bindings/memory-controllers/mediatek,smi-larb.txt. It must sort
        according to the local arbiter index, like larb0, larb1, larb2...
 - iommu-cells : must be 1. This is the mtk_m4u_id according to the HW.
        Specifies the mtk_m4u_id as defined in
        dt-binding/memory/mt2701-larb-port.h for mt2701, mt7623
-       dt-binding/memory/mt2712-larb-port.h for mt2712, and
-       dt-binding/memory/mt8173-larb-port.h for mt8173.
+       dt-binding/memory/mt2712-larb-port.h for mt2712,
+       dt-binding/memory/mt8173-larb-port.h for mt8173, and
+       dt-binding/memory/mt8183-larb-port.h for mt8183.
 
 Example:
        iommu: iommu@10205000 {
index e937ddd871a6bffe58f4c536eea311f4be445349..b478ade4da654e3c1206a209db6f58dff6b7ee07 100644 (file)
@@ -2,9 +2,10 @@ SMI (Smart Multimedia Interface) Common
 
 The hardware block diagram please check bindings/iommu/mediatek,iommu.txt
 
-Mediatek SMI have two generations of HW architecture, mt2712 and mt8173 use
-the second generation of SMI HW while mt2701 uses the first generation HW of
-SMI.
+Mediatek SMI have two generations of HW architecture, here is the list
+which generation the SoCs use:
+generation 1: mt2701 and mt7623.
+generation 2: mt2712, mt8173 and mt8183.
 
 There's slight differences between the two SMI, for generation 2, the
 register which control the iommu port is at each larb's register base. But
@@ -19,6 +20,7 @@ Required properties:
        "mediatek,mt2712-smi-common"
        "mediatek,mt7623-smi-common", "mediatek,mt2701-smi-common"
        "mediatek,mt8173-smi-common"
+       "mediatek,mt8183-smi-common"
 - reg : the register and size of the SMI block.
 - power-domains : a phandle to the power domain of this local arbiter.
 - clocks : Must contain an entry for each entry in clock-names.
@@ -30,6 +32,10 @@ Required properties:
            They may be the same if both source clocks are the same.
   - "async" : asynchronous clock, it help transform the smi clock into the emi
              clock domain, this clock is only needed by generation 1 smi HW.
+  and these 2 option clocks for generation 2 smi HW:
+  - "gals0": the path0 clock of GALS(Global Async Local Sync).
+  - "gals1": the path1 clock of GALS(Global Async Local Sync).
+  Here is the list which has this GALS: mt8183.
 
 Example:
        smi_common: smi@14022000 {
index 94eddcae77ab5f08af6b13885c3a3122cd53ce41..4b369b3e1a69b1d421fbca8492b2bcd2bc40e111 100644 (file)
@@ -8,6 +8,7 @@ Required properties:
                "mediatek,mt2712-smi-larb"
                "mediatek,mt7623-smi-larb", "mediatek,mt2701-smi-larb"
                "mediatek,mt8173-smi-larb"
+               "mediatek,mt8183-smi-larb"
 - reg : the register and size of this local arbiter.
 - mediatek,smi : a phandle to the smi_common node.
 - power-domains : a phandle to the power domain of this local arbiter.
@@ -16,6 +17,9 @@ Required properties:
   - "apb" : Advanced Peripheral Bus clock, It's the clock for setting
            the register.
   - "smi" : It's the clock for transfer data and command.
+  and this optional clock name:
+  - "gals": the clock for GALS(Global Async Local Sync).
+  Here is the list which has this GALS: mt8183.
 
 Required property for mt2701, mt2712 and mt7623:
 - mediatek,larb-id :the hardware id of this larb.
index 7fd34066b34aa30007cfde8bf9c48a5cb4c38172..59a8ec493d7af8112cf331d7ec9ab71487eab64d 100644 (file)
@@ -1342,8 +1342,7 @@ M:        Will Deacon <will@kernel.org>
 R:     Robin Murphy <robin.murphy@arm.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
-F:     drivers/iommu/arm-smmu.c
-F:     drivers/iommu/arm-smmu-v3.c
+F:     drivers/iommu/arm-smmu*
 F:     drivers/iommu/io-pgtable-arm.c
 F:     drivers/iommu/io-pgtable-arm-v7s.c
 
index 60065055162199dc4188fff1e448acb37e6d4627..d4f11c5070aeeec0294e14d44e359799e0a65bc9 100644 (file)
@@ -229,3 +229,5 @@ include/generated/ti-pm-asm-offsets.h: arch/arm/mach-omap2/pm-asm-offsets.s FORC
 $(obj)/sleep33xx.o $(obj)/sleep43xx.o: include/generated/ti-pm-asm-offsets.h
 
 targets += pm-asm-offsets.s
+
+obj-$(CONFIG_OMAP_IOMMU)               += omap-iommu.o
diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c
new file mode 100644 (file)
index 0000000..f1a6ece
--- /dev/null
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * OMAP IOMMU quirks for various TI SoCs
+ *
+ * Copyright (C) 2015-2019 Texas Instruments Incorporated - http://www.ti.com/
+ *      Suman Anna <s-anna@ti.com>
+ */
+
+#include <linux/platform_device.h>
+#include <linux/err.h>
+
+#include "omap_hwmod.h"
+#include "omap_device.h"
+#include "powerdomain.h"
+
+int omap_iommu_set_pwrdm_constraint(struct platform_device *pdev, bool request,
+                                   u8 *pwrst)
+{
+       struct powerdomain *pwrdm;
+       struct omap_device *od;
+       u8 next_pwrst;
+
+       od = to_omap_device(pdev);
+       if (!od)
+               return -ENODEV;
+
+       if (od->hwmods_cnt != 1)
+               return -EINVAL;
+
+       pwrdm = omap_hwmod_get_pwrdm(od->hwmods[0]);
+       if (!pwrdm)
+               return -EINVAL;
+
+       if (request)
+               *pwrst = pwrdm_read_next_pwrst(pwrdm);
+
+       if (*pwrst > PWRDM_POWER_RET)
+               return 0;
+
+       next_pwrst = request ? PWRDM_POWER_ON : *pwrst;
+
+       return pwrdm_set_next_pwrst(pwrdm, next_pwrst);
+}
index 7429a72f3f921996643c48d19dc206fe3034b7dd..92aceef63710861abc7c9605fe2c6b15105c3c97 100644 (file)
@@ -8,10 +8,8 @@
 extern void no_iommu_init(void);
 #ifdef CONFIG_INTEL_IOMMU
 extern int force_iommu, no_iommu;
-extern int iommu_pass_through;
 extern int iommu_detected;
 #else
-#define iommu_pass_through     (0)
 #define no_iommu               (1)
 #define iommu_detected         (0)
 #endif
index fe988c49f01ce6ae844355731ddc50f01fef3df1..f5d49cd3fbb01a9933f5d547682d4adbdde2617e 100644 (file)
@@ -22,8 +22,6 @@ int force_iommu __read_mostly = 1;
 int force_iommu __read_mostly;
 #endif
 
-int iommu_pass_through;
-
 static int __init pci_iommu_init(void)
 {
        if (iommu_detected)
index baedab8ac5385f7d40ccbe7f17e583f8589bdb49..b91623d521d9f0ffeb23e825221353cba05e0ced 100644 (file)
@@ -4,7 +4,6 @@
 
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
-extern int iommu_pass_through;
 
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
index f62b498b18fb7f5e7cc6766feba88827e8fce079..fa4352dce491c855ba6f0a1866390c750b4b42a9 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/dma-direct.h>
 #include <linux/dma-debug.h>
+#include <linux/iommu.h>
 #include <linux/dmar.h>
 #include <linux/export.h>
 #include <linux/memblock.h>
@@ -34,21 +35,6 @@ int no_iommu __read_mostly;
 /* Set this to 1 if there is a HW IOMMU in the system */
 int iommu_detected __read_mostly = 0;
 
-/*
- * This variable becomes 1 if iommu=pt is passed on the kernel command line.
- * If this variable is 1, IOMMU implementations do no DMA translation for
- * devices and allow every device to access to whole physical memory. This is
- * useful if a user wants to use an IOMMU only for KVM device assignment to
- * guests and not for driver dma translation.
- * It is also possible to disable by default in kernel config, and enable with
- * iommu=nopt at boot time.
- */
-#ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH
-int iommu_pass_through __read_mostly = 1;
-#else
-int iommu_pass_through __read_mostly;
-#endif
-
 extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
 
 void __init pci_iommu_alloc(void)
@@ -120,9 +106,9 @@ static __init int iommu_setup(char *p)
                        swiotlb = 1;
 #endif
                if (!strncmp(p, "pt", 2))
-                       iommu_pass_through = 1;
+                       iommu_set_default_passthrough(true);
                if (!strncmp(p, "nopt", 4))
-                       iommu_pass_through = 0;
+                       iommu_set_default_translated(true);
 
                gart_parse_options(p);
 
index 92ac995dd9c66be77484f5f634d463098607f91a..6e8145c36e933fc97dbf01214c8c93d9a4f10576 100644 (file)
@@ -222,7 +222,7 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
                size_t unmapped_page;
                size_t pgsize = get_pgsize(iova, len - unmapped_len);
 
-               unmapped_page = ops->unmap(ops, iova, pgsize);
+               unmapped_page = ops->unmap(ops, iova, pgsize, NULL);
                if (!unmapped_page)
                        break;
 
@@ -247,20 +247,28 @@ static void mmu_tlb_inv_context_s1(void *cookie)
        mmu_hw_do_operation(pfdev, 0, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
 }
 
-static void mmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
-                                    size_t granule, bool leaf, void *cookie)
-{}
-
 static void mmu_tlb_sync_context(void *cookie)
 {
        //struct panfrost_device *pfdev = cookie;
        // TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X
 }
 
-static const struct iommu_gather_ops mmu_tlb_ops = {
+static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule,
+                              void *cookie)
+{
+       mmu_tlb_sync_context(cookie);
+}
+
+static void mmu_tlb_flush_leaf(unsigned long iova, size_t size, size_t granule,
+                              void *cookie)
+{
+       mmu_tlb_sync_context(cookie);
+}
+
+static const struct iommu_flush_ops mmu_tlb_ops = {
        .tlb_flush_all  = mmu_tlb_inv_context_s1,
-       .tlb_add_flush  = mmu_tlb_inv_range_nosync,
-       .tlb_sync       = mmu_tlb_sync_context,
+       .tlb_flush_walk = mmu_tlb_flush_walk,
+       .tlb_flush_leaf = mmu_tlb_flush_leaf,
 };
 
 static const char *access_type_name(struct panfrost_device *pfdev,
index e15cdcd8cb3c8c64597707aa54398444f6a1ac4c..a4ddeade8ac4d72691d13f1a351f8b688e00509d 100644 (file)
@@ -182,6 +182,7 @@ config INTEL_IOMMU
        select IOMMU_IOVA
        select NEED_DMA_MAP_STATE
        select DMAR_TABLE
+       select SWIOTLB
        help
          DMA remapping (DMAR) devices support enables independent address
          translations for Direct Memory Access (DMA) from devices.
index f13f36ae1af652836254ba07315f99303fba6f88..4f405f926e739cdd4d00937f9b57b4d42ee06b30 100644 (file)
@@ -10,13 +10,14 @@ obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
 obj-$(CONFIG_IOMMU_IOVA) += iova.o
 obj-$(CONFIG_OF_IOMMU) += of_iommu.o
 obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
-obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
+obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o amd_iommu_quirks.o
 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
 obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
-obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
+obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o
 obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
+obj-$(CONFIG_INTEL_IOMMU) += intel-trace.o
 obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += intel-iommu-debugfs.o
 obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
 obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
index 61de81965c44ed95b52ef857b4f31a4cb8519a28..1ed3b98324bac09ce8399b2ee0ad445e08802d20 100644 (file)
@@ -436,7 +436,7 @@ static int iommu_init_device(struct device *dev)
         * invalid address), we ignore the capability for the device so
         * it'll be forced to go into translation mode.
         */
-       if ((iommu_pass_through || !amd_iommu_force_isolation) &&
+       if ((iommu_default_passthrough() || !amd_iommu_force_isolation) &&
            dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
                struct amd_iommu *iommu;
 
@@ -2256,7 +2256,7 @@ static int amd_iommu_add_device(struct device *dev)
 
        BUG_ON(!dev_data);
 
-       if (iommu_pass_through || dev_data->iommu_v2)
+       if (dev_data->iommu_v2)
                iommu_request_dm_for_dev(dev);
 
        /* Domains are initialized for this device - have a look what we ended up with */
@@ -2577,7 +2577,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 
                        bus_addr  = address + s->dma_address + (j << PAGE_SHIFT);
                        phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT);
-                       ret = iommu_map_page(domain, bus_addr, phys_addr, PAGE_SIZE, prot, GFP_ATOMIC);
+                       ret = iommu_map_page(domain, bus_addr, phys_addr,
+                                            PAGE_SIZE, prot,
+                                            GFP_ATOMIC | __GFP_NOWARN);
                        if (ret)
                                goto out_unmap;
 
@@ -2835,7 +2837,7 @@ int __init amd_iommu_init_api(void)
 
 int __init amd_iommu_init_dma_ops(void)
 {
-       swiotlb        = (iommu_pass_through || sme_me_mask) ? 1 : 0;
+       swiotlb        = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
        iommu_detected = 1;
 
        if (amd_iommu_unmap_flush)
@@ -3085,7 +3087,8 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
 }
 
 static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
-                          size_t page_size)
+                             size_t page_size,
+                             struct iommu_iotlb_gather *gather)
 {
        struct protection_domain *domain = to_pdomain(dom);
        size_t unmap_size;
@@ -3226,9 +3229,10 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
        domain_flush_complete(dom);
 }
 
-static void amd_iommu_iotlb_range_add(struct iommu_domain *domain,
-                                     unsigned long iova, size_t size)
+static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
+                                struct iommu_iotlb_gather *gather)
 {
+       amd_iommu_flush_iotlb_all(domain);
 }
 
 const struct iommu_ops amd_iommu_ops = {
@@ -3249,8 +3253,7 @@ const struct iommu_ops amd_iommu_ops = {
        .is_attach_deferred = amd_iommu_is_attach_deferred,
        .pgsize_bitmap  = AMD_IOMMU_PGSIZES,
        .flush_iotlb_all = amd_iommu_flush_iotlb_all,
-       .iotlb_range_add = amd_iommu_iotlb_range_add,
-       .iotlb_sync = amd_iommu_flush_iotlb_all,
+       .iotlb_sync = amd_iommu_iotlb_sync,
 };
 
 /*****************************************************************************
@@ -4343,13 +4346,62 @@ static const struct irq_domain_ops amd_ir_domain_ops = {
        .deactivate = irq_remapping_deactivate,
 };
 
+int amd_iommu_activate_guest_mode(void *data)
+{
+       struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
+       struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+
+       if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
+           !entry || entry->lo.fields_vapic.guest_mode)
+               return 0;
+
+       entry->lo.val = 0;
+       entry->hi.val = 0;
+
+       entry->lo.fields_vapic.guest_mode  = 1;
+       entry->lo.fields_vapic.ga_log_intr = 1;
+       entry->hi.fields.ga_root_ptr       = ir_data->ga_root_ptr;
+       entry->hi.fields.vector            = ir_data->ga_vector;
+       entry->lo.fields_vapic.ga_tag      = ir_data->ga_tag;
+
+       return modify_irte_ga(ir_data->irq_2_irte.devid,
+                             ir_data->irq_2_irte.index, entry, NULL);
+}
+EXPORT_SYMBOL(amd_iommu_activate_guest_mode);
+
+int amd_iommu_deactivate_guest_mode(void *data)
+{
+       struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
+       struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+       struct irq_cfg *cfg = ir_data->cfg;
+
+       if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
+           !entry || !entry->lo.fields_vapic.guest_mode)
+               return 0;
+
+       entry->lo.val = 0;
+       entry->hi.val = 0;
+
+       entry->lo.fields_remap.dm          = apic->irq_dest_mode;
+       entry->lo.fields_remap.int_type    = apic->irq_delivery_mode;
+       entry->hi.fields.vector            = cfg->vector;
+       entry->lo.fields_remap.destination =
+                               APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
+       entry->hi.fields.destination =
+                               APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
+
+       return modify_irte_ga(ir_data->irq_2_irte.devid,
+                             ir_data->irq_2_irte.index, entry, NULL);
+}
+EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
+
 static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
 {
+       int ret;
        struct amd_iommu *iommu;
        struct amd_iommu_pi_data *pi_data = vcpu_info;
        struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data;
        struct amd_ir_data *ir_data = data->chip_data;
-       struct irte_ga *irte = (struct irte_ga *) ir_data->entry;
        struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
        struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid);
 
@@ -4360,6 +4412,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
        if (!dev_data || !dev_data->use_vapic)
                return 0;
 
+       ir_data->cfg = irqd_cfg(data);
        pi_data->ir_data = ir_data;
 
        /* Note:
@@ -4378,37 +4431,24 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
 
        pi_data->prev_ga_tag = ir_data->cached_ga_tag;
        if (pi_data->is_guest_mode) {
-               /* Setting */
-               irte->hi.fields.ga_root_ptr = (pi_data->base >> 12);
-               irte->hi.fields.vector = vcpu_pi_info->vector;
-               irte->lo.fields_vapic.ga_log_intr = 1;
-               irte->lo.fields_vapic.guest_mode = 1;
-               irte->lo.fields_vapic.ga_tag = pi_data->ga_tag;
-
-               ir_data->cached_ga_tag = pi_data->ga_tag;
+               ir_data->ga_root_ptr = (pi_data->base >> 12);
+               ir_data->ga_vector = vcpu_pi_info->vector;
+               ir_data->ga_tag = pi_data->ga_tag;
+               ret = amd_iommu_activate_guest_mode(ir_data);
+               if (!ret)
+                       ir_data->cached_ga_tag = pi_data->ga_tag;
        } else {
-               /* Un-Setting */
-               struct irq_cfg *cfg = irqd_cfg(data);
-
-               irte->hi.val = 0;
-               irte->lo.val = 0;
-               irte->hi.fields.vector = cfg->vector;
-               irte->lo.fields_remap.guest_mode = 0;
-               irte->lo.fields_remap.destination =
-                               APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
-               irte->hi.fields.destination =
-                               APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
-               irte->lo.fields_remap.int_type = apic->irq_delivery_mode;
-               irte->lo.fields_remap.dm = apic->irq_dest_mode;
+               ret = amd_iommu_deactivate_guest_mode(ir_data);
 
                /*
                 * This communicates the ga_tag back to the caller
                 * so that it can do all the necessary clean up.
                 */
-               ir_data->cached_ga_tag = 0;
+               if (!ret)
+                       ir_data->cached_ga_tag = 0;
        }
 
-       return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data);
+       return ret;
 }
 
 
diff --git a/drivers/iommu/amd_iommu.h b/drivers/iommu/amd_iommu.h
new file mode 100644 (file)
index 0000000..12d540d
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef AMD_IOMMU_H
+#define AMD_IOMMU_H
+
+int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line);
+
+#ifdef CONFIG_DMI
+void amd_iommu_apply_ivrs_quirks(void);
+#else
+static void amd_iommu_apply_ivrs_quirks(void) { }
+#endif
+
+#endif
index 4413aa67000e576a43a7c8d0e8fd79ee2cc0574a..568c52317757ca2924598b69c7329dcfad940356 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/irq_remapping.h>
 
 #include <linux/crash_dump.h>
+#include "amd_iommu.h"
 #include "amd_iommu_proto.h"
 #include "amd_iommu_types.h"
 #include "irq_remapping.h"
@@ -1002,7 +1003,7 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
        set_iommu_for_device(iommu, devid);
 }
 
-static int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
+int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
 {
        struct devid_map *entry;
        struct list_head *list;
@@ -1153,6 +1154,8 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
        if (ret)
                return ret;
 
+       amd_iommu_apply_ivrs_quirks();
+
        /*
         * First save the recommended feature enable bits from ACPI
         */
diff --git a/drivers/iommu/amd_iommu_quirks.c b/drivers/iommu/amd_iommu_quirks.c
new file mode 100644 (file)
index 0000000..c235f79
--- /dev/null
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * Quirks for AMD IOMMU
+ *
+ * Copyright (C) 2019 Kai-Heng Feng <kai.heng.feng@canonical.com>
+ */
+
+#ifdef CONFIG_DMI
+#include <linux/dmi.h>
+
+#include "amd_iommu.h"
+
+#define IVHD_SPECIAL_IOAPIC            1
+
+struct ivrs_quirk_entry {
+       u8 id;
+       u16 devid;
+};
+
+enum {
+       DELL_INSPIRON_7375 = 0,
+       DELL_LATITUDE_5495,
+       LENOVO_IDEAPAD_330S_15ARR,
+};
+
+static const struct ivrs_quirk_entry ivrs_ioapic_quirks[][3] __initconst = {
+       /* ivrs_ioapic[4]=00:14.0 ivrs_ioapic[5]=00:00.2 */
+       [DELL_INSPIRON_7375] = {
+               { .id = 4, .devid = 0xa0 },
+               { .id = 5, .devid = 0x2 },
+               {}
+       },
+       /* ivrs_ioapic[4]=00:14.0 */
+       [DELL_LATITUDE_5495] = {
+               { .id = 4, .devid = 0xa0 },
+               {}
+       },
+       /* ivrs_ioapic[32]=00:14.0 */
+       [LENOVO_IDEAPAD_330S_15ARR] = {
+               { .id = 32, .devid = 0xa0 },
+               {}
+       },
+       {}
+};
+
+static int __init ivrs_ioapic_quirk_cb(const struct dmi_system_id *d)
+{
+       const struct ivrs_quirk_entry *i;
+
+       for (i = d->driver_data; i->id != 0 && i->devid != 0; i++)
+               add_special_device(IVHD_SPECIAL_IOAPIC, i->id, (u16 *)&i->devid, 0);
+
+       return 0;
+}
+
+static const struct dmi_system_id ivrs_quirks[] __initconst = {
+       {
+               .callback = ivrs_ioapic_quirk_cb,
+               .ident = "Dell Inspiron 7375",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 7375"),
+               },
+               .driver_data = (void *)&ivrs_ioapic_quirks[DELL_INSPIRON_7375],
+       },
+       {
+               .callback = ivrs_ioapic_quirk_cb,
+               .ident = "Dell Latitude 5495",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Latitude 5495"),
+               },
+               .driver_data = (void *)&ivrs_ioapic_quirks[DELL_LATITUDE_5495],
+       },
+       {
+               .callback = ivrs_ioapic_quirk_cb,
+               .ident = "Lenovo ideapad 330S-15ARR",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "81FB"),
+               },
+               .driver_data = (void *)&ivrs_ioapic_quirks[LENOVO_IDEAPAD_330S_15ARR],
+       },
+       {}
+};
+
+void __init amd_iommu_apply_ivrs_quirks(void)
+{
+       dmi_check_system(ivrs_quirks);
+}
+#endif
index 64edd5a9694cc06400f70692a654f5c006956ddd..9ac229e92b07475f30010338e427cb146821edc1 100644 (file)
@@ -873,6 +873,15 @@ struct amd_ir_data {
        struct msi_msg msi_entry;
        void *entry;    /* Pointer to union irte or struct irte_ga */
        void *ref;      /* Pointer to the actual irte */
+
+       /**
+        * Store information for activate/de-activate
+        * Guest virtual APIC mode during runtime.
+        */
+       struct irq_cfg *cfg;
+       int ga_vector;
+       int ga_root_ptr;
+       int ga_tag;
 };
 
 struct amd_irte_ops {
diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
new file mode 100644 (file)
index 0000000..5c87a38
--- /dev/null
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Miscellaneous Arm SMMU implementation and integration quirks
+// Copyright (C) 2019 Arm Limited
+
+#define pr_fmt(fmt) "arm-smmu: " fmt
+
+#include <linux/bitfield.h>
+#include <linux/of.h>
+
+#include "arm-smmu.h"
+
+
+static int arm_smmu_gr0_ns(int offset)
+{
+       switch(offset) {
+       case ARM_SMMU_GR0_sCR0:
+       case ARM_SMMU_GR0_sACR:
+       case ARM_SMMU_GR0_sGFSR:
+       case ARM_SMMU_GR0_sGFSYNR0:
+       case ARM_SMMU_GR0_sGFSYNR1:
+       case ARM_SMMU_GR0_sGFSYNR2:
+               return offset + 0x400;
+       default:
+               return offset;
+       }
+}
+
+static u32 arm_smmu_read_ns(struct arm_smmu_device *smmu, int page,
+                           int offset)
+{
+       if (page == ARM_SMMU_GR0)
+               offset = arm_smmu_gr0_ns(offset);
+       return readl_relaxed(arm_smmu_page(smmu, page) + offset);
+}
+
+static void arm_smmu_write_ns(struct arm_smmu_device *smmu, int page,
+                             int offset, u32 val)
+{
+       if (page == ARM_SMMU_GR0)
+               offset = arm_smmu_gr0_ns(offset);
+       writel_relaxed(val, arm_smmu_page(smmu, page) + offset);
+}
+
+/* Since we don't care for sGFAR, we can do without 64-bit accessors */
+static const struct arm_smmu_impl calxeda_impl = {
+       .read_reg = arm_smmu_read_ns,
+       .write_reg = arm_smmu_write_ns,
+};
+
+
+struct cavium_smmu {
+       struct arm_smmu_device smmu;
+       u32 id_base;
+};
+
+static int cavium_cfg_probe(struct arm_smmu_device *smmu)
+{
+       static atomic_t context_count = ATOMIC_INIT(0);
+       struct cavium_smmu *cs = container_of(smmu, struct cavium_smmu, smmu);
+       /*
+        * Cavium CN88xx erratum #27704.
+        * Ensure ASID and VMID allocation is unique across all SMMUs in
+        * the system.
+        */
+       cs->id_base = atomic_fetch_add(smmu->num_context_banks, &context_count);
+       dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
+
+       return 0;
+}
+
+static int cavium_init_context(struct arm_smmu_domain *smmu_domain)
+{
+       struct cavium_smmu *cs = container_of(smmu_domain->smmu,
+                                             struct cavium_smmu, smmu);
+
+       if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
+               smmu_domain->cfg.vmid += cs->id_base;
+       else
+               smmu_domain->cfg.asid += cs->id_base;
+
+       return 0;
+}
+
+static const struct arm_smmu_impl cavium_impl = {
+       .cfg_probe = cavium_cfg_probe,
+       .init_context = cavium_init_context,
+};
+
+static struct arm_smmu_device *cavium_smmu_impl_init(struct arm_smmu_device *smmu)
+{
+       struct cavium_smmu *cs;
+
+       cs = devm_kzalloc(smmu->dev, sizeof(*cs), GFP_KERNEL);
+       if (!cs)
+               return ERR_PTR(-ENOMEM);
+
+       cs->smmu = *smmu;
+       cs->smmu.impl = &cavium_impl;
+
+       devm_kfree(smmu->dev, smmu);
+
+       return &cs->smmu;
+}
+
+
+#define ARM_MMU500_ACTLR_CPRE          (1 << 1)
+
+#define ARM_MMU500_ACR_CACHE_LOCK      (1 << 26)
+#define ARM_MMU500_ACR_S2CRB_TLBEN     (1 << 10)
+#define ARM_MMU500_ACR_SMTNMB_TLBEN    (1 << 8)
+
+static int arm_mmu500_reset(struct arm_smmu_device *smmu)
+{
+       u32 reg, major;
+       int i;
+       /*
+        * On MMU-500 r2p0 onwards we need to clear ACR.CACHE_LOCK before
+        * writes to the context bank ACTLRs will stick. And we just hope that
+        * Secure has also cleared SACR.CACHE_LOCK for this to take effect...
+        */
+       reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID7);
+       major = FIELD_GET(ID7_MAJOR, reg);
+       reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sACR);
+       if (major >= 2)
+               reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
+       /*
+        * Allow unmatched Stream IDs to allocate bypass
+        * TLB entries for reduced latency.
+        */
+       reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sACR, reg);
+
+       /*
+        * Disable MMU-500's not-particularly-beneficial next-page
+        * prefetcher for the sake of errata #841119 and #826419.
+        */
+       for (i = 0; i < smmu->num_context_banks; ++i) {
+               reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
+               reg &= ~ARM_MMU500_ACTLR_CPRE;
+               arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg);
+       }
+
+       return 0;
+}
+
+static const struct arm_smmu_impl arm_mmu500_impl = {
+       .reset = arm_mmu500_reset,
+};
+
+
+struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
+{
+       /*
+        * We will inevitably have to combine model-specific implementation
+        * quirks with platform-specific integration quirks, but everything
+        * we currently support happens to work out as straightforward
+        * mutually-exclusive assignments.
+        */
+       switch (smmu->model) {
+       case ARM_MMU500:
+               smmu->impl = &arm_mmu500_impl;
+               break;
+       case CAVIUM_SMMUV2:
+               return cavium_smmu_impl_init(smmu);
+       default:
+               break;
+       }
+
+       if (of_property_read_bool(smmu->dev->of_node,
+                                 "calxeda,smmu-secure-config-access"))
+               smmu->impl = &calxeda_impl;
+
+       return smmu;
+}
diff --git a/drivers/iommu/arm-smmu-regs.h b/drivers/iommu/arm-smmu-regs.h
deleted file mode 100644 (file)
index 1c278f7..0000000
+++ /dev/null
@@ -1,210 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * IOMMU API for ARM architected SMMU implementations.
- *
- * Copyright (C) 2013 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#ifndef _ARM_SMMU_REGS_H
-#define _ARM_SMMU_REGS_H
-
-/* Configuration registers */
-#define ARM_SMMU_GR0_sCR0              0x0
-#define sCR0_CLIENTPD                  (1 << 0)
-#define sCR0_GFRE                      (1 << 1)
-#define sCR0_GFIE                      (1 << 2)
-#define sCR0_EXIDENABLE                        (1 << 3)
-#define sCR0_GCFGFRE                   (1 << 4)
-#define sCR0_GCFGFIE                   (1 << 5)
-#define sCR0_USFCFG                    (1 << 10)
-#define sCR0_VMIDPNE                   (1 << 11)
-#define sCR0_PTM                       (1 << 12)
-#define sCR0_FB                                (1 << 13)
-#define sCR0_VMID16EN                  (1 << 31)
-#define sCR0_BSU_SHIFT                 14
-#define sCR0_BSU_MASK                  0x3
-
-/* Auxiliary Configuration register */
-#define ARM_SMMU_GR0_sACR              0x10
-
-/* Identification registers */
-#define ARM_SMMU_GR0_ID0               0x20
-#define ARM_SMMU_GR0_ID1               0x24
-#define ARM_SMMU_GR0_ID2               0x28
-#define ARM_SMMU_GR0_ID3               0x2c
-#define ARM_SMMU_GR0_ID4               0x30
-#define ARM_SMMU_GR0_ID5               0x34
-#define ARM_SMMU_GR0_ID6               0x38
-#define ARM_SMMU_GR0_ID7               0x3c
-#define ARM_SMMU_GR0_sGFSR             0x48
-#define ARM_SMMU_GR0_sGFSYNR0          0x50
-#define ARM_SMMU_GR0_sGFSYNR1          0x54
-#define ARM_SMMU_GR0_sGFSYNR2          0x58
-
-#define ID0_S1TS                       (1 << 30)
-#define ID0_S2TS                       (1 << 29)
-#define ID0_NTS                                (1 << 28)
-#define ID0_SMS                                (1 << 27)
-#define ID0_ATOSNS                     (1 << 26)
-#define ID0_PTFS_NO_AARCH32            (1 << 25)
-#define ID0_PTFS_NO_AARCH32S           (1 << 24)
-#define ID0_CTTW                       (1 << 14)
-#define ID0_NUMIRPT_SHIFT              16
-#define ID0_NUMIRPT_MASK               0xff
-#define ID0_NUMSIDB_SHIFT              9
-#define ID0_NUMSIDB_MASK               0xf
-#define ID0_EXIDS                      (1 << 8)
-#define ID0_NUMSMRG_SHIFT              0
-#define ID0_NUMSMRG_MASK               0xff
-
-#define ID1_PAGESIZE                   (1 << 31)
-#define ID1_NUMPAGENDXB_SHIFT          28
-#define ID1_NUMPAGENDXB_MASK           7
-#define ID1_NUMS2CB_SHIFT              16
-#define ID1_NUMS2CB_MASK               0xff
-#define ID1_NUMCB_SHIFT                        0
-#define ID1_NUMCB_MASK                 0xff
-
-#define ID2_OAS_SHIFT                  4
-#define ID2_OAS_MASK                   0xf
-#define ID2_IAS_SHIFT                  0
-#define ID2_IAS_MASK                   0xf
-#define ID2_UBS_SHIFT                  8
-#define ID2_UBS_MASK                   0xf
-#define ID2_PTFS_4K                    (1 << 12)
-#define ID2_PTFS_16K                   (1 << 13)
-#define ID2_PTFS_64K                   (1 << 14)
-#define ID2_VMID16                     (1 << 15)
-
-#define ID7_MAJOR_SHIFT                        4
-#define ID7_MAJOR_MASK                 0xf
-
-/* Global TLB invalidation */
-#define ARM_SMMU_GR0_TLBIVMID          0x64
-#define ARM_SMMU_GR0_TLBIALLNSNH       0x68
-#define ARM_SMMU_GR0_TLBIALLH          0x6c
-#define ARM_SMMU_GR0_sTLBGSYNC         0x70
-#define ARM_SMMU_GR0_sTLBGSTATUS       0x74
-#define sTLBGSTATUS_GSACTIVE           (1 << 0)
-
-/* Stream mapping registers */
-#define ARM_SMMU_GR0_SMR(n)            (0x800 + ((n) << 2))
-#define SMR_VALID                      (1 << 31)
-#define SMR_MASK_SHIFT                 16
-#define SMR_ID_SHIFT                   0
-
-#define ARM_SMMU_GR0_S2CR(n)           (0xc00 + ((n) << 2))
-#define S2CR_CBNDX_SHIFT               0
-#define S2CR_CBNDX_MASK                        0xff
-#define S2CR_EXIDVALID                 (1 << 10)
-#define S2CR_TYPE_SHIFT                        16
-#define S2CR_TYPE_MASK                 0x3
-enum arm_smmu_s2cr_type {
-       S2CR_TYPE_TRANS,
-       S2CR_TYPE_BYPASS,
-       S2CR_TYPE_FAULT,
-};
-
-#define S2CR_PRIVCFG_SHIFT             24
-#define S2CR_PRIVCFG_MASK              0x3
-enum arm_smmu_s2cr_privcfg {
-       S2CR_PRIVCFG_DEFAULT,
-       S2CR_PRIVCFG_DIPAN,
-       S2CR_PRIVCFG_UNPRIV,
-       S2CR_PRIVCFG_PRIV,
-};
-
-/* Context bank attribute registers */
-#define ARM_SMMU_GR1_CBAR(n)           (0x0 + ((n) << 2))
-#define CBAR_VMID_SHIFT                        0
-#define CBAR_VMID_MASK                 0xff
-#define CBAR_S1_BPSHCFG_SHIFT          8
-#define CBAR_S1_BPSHCFG_MASK           3
-#define CBAR_S1_BPSHCFG_NSH            3
-#define CBAR_S1_MEMATTR_SHIFT          12
-#define CBAR_S1_MEMATTR_MASK           0xf
-#define CBAR_S1_MEMATTR_WB             0xf
-#define CBAR_TYPE_SHIFT                        16
-#define CBAR_TYPE_MASK                 0x3
-#define CBAR_TYPE_S2_TRANS             (0 << CBAR_TYPE_SHIFT)
-#define CBAR_TYPE_S1_TRANS_S2_BYPASS   (1 << CBAR_TYPE_SHIFT)
-#define CBAR_TYPE_S1_TRANS_S2_FAULT    (2 << CBAR_TYPE_SHIFT)
-#define CBAR_TYPE_S1_TRANS_S2_TRANS    (3 << CBAR_TYPE_SHIFT)
-#define CBAR_IRPTNDX_SHIFT             24
-#define CBAR_IRPTNDX_MASK              0xff
-
-#define ARM_SMMU_GR1_CBFRSYNRA(n)      (0x400 + ((n) << 2))
-
-#define ARM_SMMU_GR1_CBA2R(n)          (0x800 + ((n) << 2))
-#define CBA2R_RW64_32BIT               (0 << 0)
-#define CBA2R_RW64_64BIT               (1 << 0)
-#define CBA2R_VMID_SHIFT               16
-#define CBA2R_VMID_MASK                        0xffff
-
-#define ARM_SMMU_CB_SCTLR              0x0
-#define ARM_SMMU_CB_ACTLR              0x4
-#define ARM_SMMU_CB_RESUME             0x8
-#define ARM_SMMU_CB_TTBCR2             0x10
-#define ARM_SMMU_CB_TTBR0              0x20
-#define ARM_SMMU_CB_TTBR1              0x28
-#define ARM_SMMU_CB_TTBCR              0x30
-#define ARM_SMMU_CB_CONTEXTIDR         0x34
-#define ARM_SMMU_CB_S1_MAIR0           0x38
-#define ARM_SMMU_CB_S1_MAIR1           0x3c
-#define ARM_SMMU_CB_PAR                        0x50
-#define ARM_SMMU_CB_FSR                        0x58
-#define ARM_SMMU_CB_FAR                        0x60
-#define ARM_SMMU_CB_FSYNR0             0x68
-#define ARM_SMMU_CB_S1_TLBIVA          0x600
-#define ARM_SMMU_CB_S1_TLBIASID                0x610
-#define ARM_SMMU_CB_S1_TLBIVAL         0x620
-#define ARM_SMMU_CB_S2_TLBIIPAS2       0x630
-#define ARM_SMMU_CB_S2_TLBIIPAS2L      0x638
-#define ARM_SMMU_CB_TLBSYNC            0x7f0
-#define ARM_SMMU_CB_TLBSTATUS          0x7f4
-#define ARM_SMMU_CB_ATS1PR             0x800
-#define ARM_SMMU_CB_ATSR               0x8f0
-
-#define SCTLR_S1_ASIDPNE               (1 << 12)
-#define SCTLR_CFCFG                    (1 << 7)
-#define SCTLR_CFIE                     (1 << 6)
-#define SCTLR_CFRE                     (1 << 5)
-#define SCTLR_E                                (1 << 4)
-#define SCTLR_AFE                      (1 << 2)
-#define SCTLR_TRE                      (1 << 1)
-#define SCTLR_M                                (1 << 0)
-
-#define CB_PAR_F                       (1 << 0)
-
-#define ATSR_ACTIVE                    (1 << 0)
-
-#define RESUME_RETRY                   (0 << 0)
-#define RESUME_TERMINATE               (1 << 0)
-
-#define TTBCR2_SEP_SHIFT               15
-#define TTBCR2_SEP_UPSTREAM            (0x7 << TTBCR2_SEP_SHIFT)
-#define TTBCR2_AS                      (1 << 4)
-
-#define TTBRn_ASID_SHIFT               48
-
-#define FSR_MULTI                      (1 << 31)
-#define FSR_SS                         (1 << 30)
-#define FSR_UUT                                (1 << 8)
-#define FSR_ASF                                (1 << 7)
-#define FSR_TLBLKF                     (1 << 6)
-#define FSR_TLBMCF                     (1 << 5)
-#define FSR_EF                         (1 << 4)
-#define FSR_PF                         (1 << 3)
-#define FSR_AFF                                (1 << 2)
-#define FSR_TF                         (1 << 1)
-
-#define FSR_IGN                                (FSR_AFF | FSR_ASF | \
-                                        FSR_TLBMCF | FSR_TLBLKF)
-#define FSR_FAULT                      (FSR_MULTI | FSR_SS | FSR_UUT | \
-                                        FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
-
-#define FSYNR0_WNR                     (1 << 4)
-
-#endif /* _ARM_SMMU_REGS_H */
index c5c93e48b4dbdf7409ca3b8008a24a4ab3214a49..4aa414843557651673c8c1f1f971d944a37f2647 100644 (file)
 #define ARM_SMMU_MEMATTR_DEVICE_nGnRE  0x1
 #define ARM_SMMU_MEMATTR_OIWB          0xf
 
-#define Q_IDX(q, p)                    ((p) & ((1 << (q)->max_n_shift) - 1))
-#define Q_WRP(q, p)                    ((p) & (1 << (q)->max_n_shift))
-#define Q_OVERFLOW_FLAG                        (1 << 31)
-#define Q_OVF(q, p)                    ((p) & Q_OVERFLOW_FLAG)
+#define Q_IDX(llq, p)                  ((p) & ((1 << (llq)->max_n_shift) - 1))
+#define Q_WRP(llq, p)                  ((p) & (1 << (llq)->max_n_shift))
+#define Q_OVERFLOW_FLAG                        (1U << 31)
+#define Q_OVF(p)                       ((p) & Q_OVERFLOW_FLAG)
 #define Q_ENT(q, p)                    ((q)->base +                    \
-                                        Q_IDX(q, p) * (q)->ent_dwords)
+                                        Q_IDX(&((q)->llq), p) *        \
+                                        (q)->ent_dwords)
 
 #define Q_BASE_RWA                     (1UL << 62)
 #define Q_BASE_ADDR_MASK               GENMASK_ULL(51, 5)
 #define CMDQ_ERR_CERROR_ABT_IDX                2
 #define CMDQ_ERR_CERROR_ATC_INV_IDX    3
 
+#define CMDQ_PROD_OWNED_FLAG           Q_OVERFLOW_FLAG
+
+/*
+ * This is used to size the command queue and therefore must be at least
+ * BITS_PER_LONG so that the valid_map works correctly (it relies on the
+ * total number of queue entries being a multiple of BITS_PER_LONG).
+ */
+#define CMDQ_BATCH_ENTRIES             BITS_PER_LONG
+
 #define CMDQ_0_OP                      GENMASK_ULL(7, 0)
 #define CMDQ_0_SSV                     (1UL << 11)
 
 #define PRIQ_1_ADDR_MASK               GENMASK_ULL(63, 12)
 
 /* High-level queue structures */
-#define ARM_SMMU_POLL_TIMEOUT_US       100
-#define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US  1000000 /* 1s! */
-#define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT  10
+#define ARM_SMMU_POLL_TIMEOUT_US       1000000 /* 1s! */
+#define ARM_SMMU_POLL_SPIN_COUNT       10
 
 #define MSI_IOVA_BASE                  0x8000000
 #define MSI_IOVA_LENGTH                        0x100000
@@ -472,13 +481,29 @@ struct arm_smmu_cmdq_ent {
 
                #define CMDQ_OP_CMD_SYNC        0x46
                struct {
-                       u32                     msidata;
                        u64                     msiaddr;
                } sync;
        };
 };
 
+struct arm_smmu_ll_queue {
+       union {
+               u64                     val;
+               struct {
+                       u32             prod;
+                       u32             cons;
+               };
+               struct {
+                       atomic_t        prod;
+                       atomic_t        cons;
+               } atomic;
+               u8                      __pad[SMP_CACHE_BYTES];
+       } ____cacheline_aligned_in_smp;
+       u32                             max_n_shift;
+};
+
 struct arm_smmu_queue {
+       struct arm_smmu_ll_queue        llq;
        int                             irq; /* Wired interrupt */
 
        __le64                          *base;
@@ -486,17 +511,23 @@ struct arm_smmu_queue {
        u64                             q_base;
 
        size_t                          ent_dwords;
-       u32                             max_n_shift;
-       u32                             prod;
-       u32                             cons;
 
        u32 __iomem                     *prod_reg;
        u32 __iomem                     *cons_reg;
 };
 
+struct arm_smmu_queue_poll {
+       ktime_t                         timeout;
+       unsigned int                    delay;
+       unsigned int                    spin_cnt;
+       bool                            wfe;
+};
+
 struct arm_smmu_cmdq {
        struct arm_smmu_queue           q;
-       spinlock_t                      lock;
+       atomic_long_t                   *valid_map;
+       atomic_t                        owner_prod;
+       atomic_t                        lock;
 };
 
 struct arm_smmu_evtq {
@@ -576,8 +607,6 @@ struct arm_smmu_device {
 
        int                             gerr_irq;
        int                             combined_irq;
-       u32                             sync_nr;
-       u8                              prev_cmd_opcode;
 
        unsigned long                   ias; /* IPA */
        unsigned long                   oas; /* PA */
@@ -596,12 +625,6 @@ struct arm_smmu_device {
 
        struct arm_smmu_strtab_cfg      strtab_cfg;
 
-       /* Hi16xx adds an extra 32 bits of goodness to its MSI payload */
-       union {
-               u32                     sync_count;
-               u64                     padding;
-       };
-
        /* IOMMU core code handle */
        struct iommu_device             iommu;
 };
@@ -614,7 +637,7 @@ struct arm_smmu_master {
        struct list_head                domain_head;
        u32                             *sids;
        unsigned int                    num_sids;
-       bool                            ats_enabled             :1;
+       bool                            ats_enabled;
 };
 
 /* SMMU private data for an IOMMU domain */
@@ -631,6 +654,7 @@ struct arm_smmu_domain {
 
        struct io_pgtable_ops           *pgtbl_ops;
        bool                            non_strict;
+       atomic_t                        nr_ats_masters;
 
        enum arm_smmu_domain_stage      stage;
        union {
@@ -685,85 +709,97 @@ static void parse_driver_options(struct arm_smmu_device *smmu)
 }
 
 /* Low-level queue manipulation functions */
-static bool queue_full(struct arm_smmu_queue *q)
+static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
+{
+       u32 space, prod, cons;
+
+       prod = Q_IDX(q, q->prod);
+       cons = Q_IDX(q, q->cons);
+
+       if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
+               space = (1 << q->max_n_shift) - (prod - cons);
+       else
+               space = cons - prod;
+
+       return space >= n;
+}
+
+static bool queue_full(struct arm_smmu_ll_queue *q)
 {
        return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
               Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
 }
 
-static bool queue_empty(struct arm_smmu_queue *q)
+static bool queue_empty(struct arm_smmu_ll_queue *q)
 {
        return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
               Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
 }
 
-static void queue_sync_cons(struct arm_smmu_queue *q)
+static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
 {
-       q->cons = readl_relaxed(q->cons_reg);
+       return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
+               (Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
+              ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
+               (Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
 }
 
-static void queue_inc_cons(struct arm_smmu_queue *q)
+static void queue_sync_cons_out(struct arm_smmu_queue *q)
 {
-       u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
-
-       q->cons = Q_OVF(q, q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
-
        /*
         * Ensure that all CPU accesses (reads and writes) to the queue
         * are complete before we update the cons pointer.
         */
        mb();
-       writel_relaxed(q->cons, q->cons_reg);
+       writel_relaxed(q->llq.cons, q->cons_reg);
+}
+
+static void queue_inc_cons(struct arm_smmu_ll_queue *q)
+{
+       u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
+       q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
 }
 
-static int queue_sync_prod(struct arm_smmu_queue *q)
+static int queue_sync_prod_in(struct arm_smmu_queue *q)
 {
        int ret = 0;
        u32 prod = readl_relaxed(q->prod_reg);
 
-       if (Q_OVF(q, prod) != Q_OVF(q, q->prod))
+       if (Q_OVF(prod) != Q_OVF(q->llq.prod))
                ret = -EOVERFLOW;
 
-       q->prod = prod;
+       q->llq.prod = prod;
        return ret;
 }
 
-static void queue_inc_prod(struct arm_smmu_queue *q)
+static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
 {
-       u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
-
-       q->prod = Q_OVF(q, q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
-       writel(q->prod, q->prod_reg);
+       u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
+       return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
 }
 
-/*
- * Wait for the SMMU to consume items. If sync is true, wait until the queue
- * is empty. Otherwise, wait until there is at least one free slot.
- */
-static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
+static void queue_poll_init(struct arm_smmu_device *smmu,
+                           struct arm_smmu_queue_poll *qp)
 {
-       ktime_t timeout;
-       unsigned int delay = 1, spin_cnt = 0;
-
-       /* Wait longer if it's a CMD_SYNC */
-       timeout = ktime_add_us(ktime_get(), sync ?
-                                           ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
-                                           ARM_SMMU_POLL_TIMEOUT_US);
+       qp->delay = 1;
+       qp->spin_cnt = 0;
+       qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+       qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
+}
 
-       while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
-               if (ktime_compare(ktime_get(), timeout) > 0)
-                       return -ETIMEDOUT;
+static int queue_poll(struct arm_smmu_queue_poll *qp)
+{
+       if (ktime_compare(ktime_get(), qp->timeout) > 0)
+               return -ETIMEDOUT;
 
-               if (wfe) {
-                       wfe();
-               } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
-                       cpu_relax();
-                       continue;
-               } else {
-                       udelay(delay);
-                       delay *= 2;
-                       spin_cnt = 0;
-               }
+       if (qp->wfe) {
+               wfe();
+       } else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
+               cpu_relax();
+       } else {
+               udelay(qp->delay);
+               qp->delay *= 2;
+               qp->spin_cnt = 0;
        }
 
        return 0;
@@ -777,16 +813,6 @@ static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
                *dst++ = cpu_to_le64(*src++);
 }
 
-static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
-{
-       if (queue_full(q))
-               return -ENOSPC;
-
-       queue_write(Q_ENT(q, q->prod), ent, q->ent_dwords);
-       queue_inc_prod(q);
-       return 0;
-}
-
 static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
 {
        int i;
@@ -797,11 +823,12 @@ static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
 
 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
 {
-       if (queue_empty(q))
+       if (queue_empty(&q->llq))
                return -EAGAIN;
 
-       queue_read(ent, Q_ENT(q, q->cons), q->ent_dwords);
-       queue_inc_cons(q);
+       queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
+       queue_inc_cons(&q->llq);
+       queue_sync_cons_out(q);
        return 0;
 }
 
@@ -868,20 +895,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
                cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
                break;
        case CMDQ_OP_CMD_SYNC:
-               if (ent->sync.msiaddr)
+               if (ent->sync.msiaddr) {
                        cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
-               else
+                       cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
+               } else {
                        cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
+               }
                cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
                cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
-               /*
-                * Commands are written little-endian, but we want the SMMU to
-                * receive MSIData, and thus write it back to memory, in CPU
-                * byte order, so big-endian needs an extra byteswap here.
-                */
-               cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA,
-                                    cpu_to_le32(ent->sync.msidata));
-               cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
                break;
        default:
                return -ENOENT;
@@ -890,6 +911,27 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
        return 0;
 }
 
+static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
+                                        u32 prod)
+{
+       struct arm_smmu_queue *q = &smmu->cmdq.q;
+       struct arm_smmu_cmdq_ent ent = {
+               .opcode = CMDQ_OP_CMD_SYNC,
+       };
+
+       /*
+        * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
+        * payload, so the write will zero the entire command on that platform.
+        */
+       if (smmu->features & ARM_SMMU_FEAT_MSI &&
+           smmu->features & ARM_SMMU_FEAT_COHERENCY) {
+               ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
+                                  q->ent_dwords * 8;
+       }
+
+       arm_smmu_cmdq_build_cmd(cmd, &ent);
+}
+
 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 {
        static const char *cerror_str[] = {
@@ -948,109 +990,456 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
        queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }
 
-static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
+/*
+ * Command queue locking.
+ * This is a form of bastardised rwlock with the following major changes:
+ *
+ * - The only LOCK routines are exclusive_trylock() and shared_lock().
+ *   Neither have barrier semantics, and instead provide only a control
+ *   dependency.
+ *
+ * - The UNLOCK routines are supplemented with shared_tryunlock(), which
+ *   fails if the caller appears to be the last lock holder (yes, this is
+ *   racy). All successful UNLOCK routines have RELEASE semantics.
+ */
+static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
 {
-       struct arm_smmu_queue *q = &smmu->cmdq.q;
-       bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+       int val;
+
+       /*
+        * We can try to avoid the cmpxchg() loop by simply incrementing the
+        * lock counter. When held in exclusive state, the lock counter is set
+        * to INT_MIN so these increments won't hurt as the value will remain
+        * negative.
+        */
+       if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
+               return;
+
+       do {
+               val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
+       } while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
+}
+
+static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
+{
+       (void)atomic_dec_return_release(&cmdq->lock);
+}
+
+static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
+{
+       if (atomic_read(&cmdq->lock) == 1)
+               return false;
+
+       arm_smmu_cmdq_shared_unlock(cmdq);
+       return true;
+}
+
+#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)           \
+({                                                                     \
+       bool __ret;                                                     \
+       local_irq_save(flags);                                          \
+       __ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);       \
+       if (!__ret)                                                     \
+               local_irq_restore(flags);                               \
+       __ret;                                                          \
+})
+
+#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)         \
+({                                                                     \
+       atomic_set_release(&cmdq->lock, 0);                             \
+       local_irq_restore(flags);                                       \
+})
+
+
+/*
+ * Command queue insertion.
+ * This is made fiddly by our attempts to achieve some sort of scalability
+ * since there is one queue shared amongst all of the CPUs in the system.  If
+ * you like mixed-size concurrency, dependency ordering and relaxed atomics,
+ * then you'll *love* this monstrosity.
+ *
+ * The basic idea is to split the queue up into ranges of commands that are
+ * owned by a given CPU; the owner may not have written all of the commands
+ * itself, but is responsible for advancing the hardware prod pointer when
+ * the time comes. The algorithm is roughly:
+ *
+ *     1. Allocate some space in the queue. At this point we also discover
+ *        whether the head of the queue is currently owned by another CPU,
+ *        or whether we are the owner.
+ *
+ *     2. Write our commands into our allocated slots in the queue.
+ *
+ *     3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
+ *
+ *     4. If we are an owner:
+ *             a. Wait for the previous owner to finish.
+ *             b. Mark the queue head as unowned, which tells us the range
+ *                that we are responsible for publishing.
+ *             c. Wait for all commands in our owned range to become valid.
+ *             d. Advance the hardware prod pointer.
+ *             e. Tell the next owner we've finished.
+ *
+ *     5. If we are inserting a CMD_SYNC (we may or may not have been an
+ *        owner), then we need to stick around until it has completed:
+ *             a. If we have MSIs, the SMMU can write back into the CMD_SYNC
+ *                to clear the first 4 bytes.
+ *             b. Otherwise, we spin waiting for the hardware cons pointer to
+ *                advance past our command.
+ *
+ * The devil is in the details, particularly the use of locking for handling
+ * SYNC completion and freeing up space in the queue before we think that it is
+ * full.
+ */
+static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
+                                              u32 sprod, u32 eprod, bool set)
+{
+       u32 swidx, sbidx, ewidx, ebidx;
+       struct arm_smmu_ll_queue llq = {
+               .max_n_shift    = cmdq->q.llq.max_n_shift,
+               .prod           = sprod,
+       };
+
+       ewidx = BIT_WORD(Q_IDX(&llq, eprod));
+       ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
+
+       while (llq.prod != eprod) {
+               unsigned long mask;
+               atomic_long_t *ptr;
+               u32 limit = BITS_PER_LONG;
+
+               swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
+               sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
 
-       smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
+               ptr = &cmdq->valid_map[swidx];
 
-       while (queue_insert_raw(q, cmd) == -ENOSPC) {
-               if (queue_poll_cons(q, false, wfe))
-                       dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
+               if ((swidx == ewidx) && (sbidx < ebidx))
+                       limit = ebidx;
+
+               mask = GENMASK(limit - 1, sbidx);
+
+               /*
+                * The valid bit is the inverse of the wrap bit. This means
+                * that a zero-initialised queue is invalid and, after marking
+                * all entries as valid, they become invalid again when we
+                * wrap.
+                */
+               if (set) {
+                       atomic_long_xor(mask, ptr);
+               } else { /* Poll */
+                       unsigned long valid;
+
+                       valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
+                       atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
+               }
+
+               llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
        }
 }
 
-static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
-                                   struct arm_smmu_cmdq_ent *ent)
+/* Mark all entries in the range [sprod, eprod) as valid */
+static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
+                                       u32 sprod, u32 eprod)
+{
+       __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
+}
+
+/* Wait for all entries in the range [sprod, eprod) to become valid */
+static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
+                                        u32 sprod, u32 eprod)
+{
+       __arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
+}
+
+/* Wait for the command queue to become non-full */
+static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
+                                            struct arm_smmu_ll_queue *llq)
 {
-       u64 cmd[CMDQ_ENT_DWORDS];
        unsigned long flags;
+       struct arm_smmu_queue_poll qp;
+       struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+       int ret = 0;
 
-       if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
-               dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
-                        ent->opcode);
-               return;
+       /*
+        * Try to update our copy of cons by grabbing exclusive cmdq access. If
+        * that fails, spin until somebody else updates it for us.
+        */
+       if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
+               WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
+               arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
+               llq->val = READ_ONCE(cmdq->q.llq.val);
+               return 0;
        }
 
-       spin_lock_irqsave(&smmu->cmdq.lock, flags);
-       arm_smmu_cmdq_insert_cmd(smmu, cmd);
-       spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+       queue_poll_init(smmu, &qp);
+       do {
+               llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
+               if (!queue_full(llq))
+                       break;
+
+               ret = queue_poll(&qp);
+       } while (!ret);
+
+       return ret;
 }
 
 /*
- * The difference between val and sync_idx is bounded by the maximum size of
- * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
+ * Wait until the SMMU signals a CMD_SYNC completion MSI.
+ * Must be called with the cmdq lock held in some capacity.
  */
-static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
+static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
+                                         struct arm_smmu_ll_queue *llq)
 {
-       ktime_t timeout;
-       u32 val;
+       int ret = 0;
+       struct arm_smmu_queue_poll qp;
+       struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+       u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
 
-       timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
-       val = smp_cond_load_acquire(&smmu->sync_count,
-                                   (int)(VAL - sync_idx) >= 0 ||
-                                   !ktime_before(ktime_get(), timeout));
+       queue_poll_init(smmu, &qp);
 
-       return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
+       /*
+        * The MSI won't generate an event, since it's being written back
+        * into the command queue.
+        */
+       qp.wfe = false;
+       smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
+       llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
+       return ret;
 }
 
-static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
+/*
+ * Wait until the SMMU cons index passes llq->prod.
+ * Must be called with the cmdq lock held in some capacity.
+ */
+static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
+                                              struct arm_smmu_ll_queue *llq)
 {
-       u64 cmd[CMDQ_ENT_DWORDS];
-       unsigned long flags;
-       struct arm_smmu_cmdq_ent ent = {
-               .opcode = CMDQ_OP_CMD_SYNC,
-               .sync   = {
-                       .msiaddr = virt_to_phys(&smmu->sync_count),
-               },
-       };
+       struct arm_smmu_queue_poll qp;
+       struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+       u32 prod = llq->prod;
+       int ret = 0;
 
-       spin_lock_irqsave(&smmu->cmdq.lock, flags);
+       queue_poll_init(smmu, &qp);
+       llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
+       do {
+               if (queue_consumed(llq, prod))
+                       break;
 
-       /* Piggy-back on the previous command if it's a SYNC */
-       if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
-               ent.sync.msidata = smmu->sync_nr;
-       } else {
-               ent.sync.msidata = ++smmu->sync_nr;
-               arm_smmu_cmdq_build_cmd(cmd, &ent);
-               arm_smmu_cmdq_insert_cmd(smmu, cmd);
-       }
+               ret = queue_poll(&qp);
 
-       spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+               /*
+                * This needs to be a readl() so that our subsequent call
+                * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
+                *
+                * Specifically, we need to ensure that we observe all
+                * shared_lock()s by other CMD_SYNCs that share our owner,
+                * so that a failing call to tryunlock() means that we're
+                * the last one out and therefore we can safely advance
+                * cmdq->q.llq.cons. Roughly speaking:
+                *
+                * CPU 0                CPU1                    CPU2 (us)
+                *
+                * if (sync)
+                *      shared_lock();
+                *
+                * dma_wmb();
+                * set_valid_map();
+                *
+                *                      if (owner) {
+                *                              poll_valid_map();
+                *                              <control dependency>
+                *                              writel(prod_reg);
+                *
+                *                                              readl(cons_reg);
+                *                                              tryunlock();
+                *
+                * Requires us to see CPU 0's shared_lock() acquisition.
+                */
+               llq->cons = readl(cmdq->q.cons_reg);
+       } while (!ret);
 
-       return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
+       return ret;
 }
 
-static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
+                                        struct arm_smmu_ll_queue *llq)
 {
-       u64 cmd[CMDQ_ENT_DWORDS];
+       if (smmu->features & ARM_SMMU_FEAT_MSI &&
+           smmu->features & ARM_SMMU_FEAT_COHERENCY)
+               return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
+
+       return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
+}
+
+static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
+                                       u32 prod, int n)
+{
+       int i;
+       struct arm_smmu_ll_queue llq = {
+               .max_n_shift    = cmdq->q.llq.max_n_shift,
+               .prod           = prod,
+       };
+
+       for (i = 0; i < n; ++i) {
+               u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
+
+               prod = queue_inc_prod_n(&llq, i);
+               queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
+       }
+}
+
+/*
+ * This is the actual insertion function, and provides the following
+ * ordering guarantees to callers:
+ *
+ * - There is a dma_wmb() before publishing any commands to the queue.
+ *   This can be relied upon to order prior writes to data structures
+ *   in memory (such as a CD or an STE) before the command.
+ *
+ * - On completion of a CMD_SYNC, there is a control dependency.
+ *   This can be relied upon to order subsequent writes to memory (e.g.
+ *   freeing an IOVA) after completion of the CMD_SYNC.
+ *
+ * - Command insertion is totally ordered, so if two CPUs each race to
+ *   insert their own list of commands then all of the commands from one
+ *   CPU will appear before any of the commands from the other CPU.
+ */
+static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+                                      u64 *cmds, int n, bool sync)
+{
+       u64 cmd_sync[CMDQ_ENT_DWORDS];
+       u32 prod;
        unsigned long flags;
-       bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
-       struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
-       int ret;
+       bool owner;
+       struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+       struct arm_smmu_ll_queue llq = {
+               .max_n_shift = cmdq->q.llq.max_n_shift,
+       }, head = llq;
+       int ret = 0;
 
-       arm_smmu_cmdq_build_cmd(cmd, &ent);
+       /* 1. Allocate some space in the queue */
+       local_irq_save(flags);
+       llq.val = READ_ONCE(cmdq->q.llq.val);
+       do {
+               u64 old;
+
+               while (!queue_has_space(&llq, n + sync)) {
+                       local_irq_restore(flags);
+                       if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
+                               dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
+                       local_irq_save(flags);
+               }
 
-       spin_lock_irqsave(&smmu->cmdq.lock, flags);
-       arm_smmu_cmdq_insert_cmd(smmu, cmd);
-       ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
-       spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+               head.cons = llq.cons;
+               head.prod = queue_inc_prod_n(&llq, n + sync) |
+                                            CMDQ_PROD_OWNED_FLAG;
 
+               old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
+               if (old == llq.val)
+                       break;
+
+               llq.val = old;
+       } while (1);
+       owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
+       head.prod &= ~CMDQ_PROD_OWNED_FLAG;
+       llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
+
+       /*
+        * 2. Write our commands into the queue
+        * Dependency ordering from the cmpxchg() loop above.
+        */
+       arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
+       if (sync) {
+               prod = queue_inc_prod_n(&llq, n);
+               arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
+               queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
+
+               /*
+                * In order to determine completion of our CMD_SYNC, we must
+                * ensure that the queue can't wrap twice without us noticing.
+                * We achieve that by taking the cmdq lock as shared before
+                * marking our slot as valid.
+                */
+               arm_smmu_cmdq_shared_lock(cmdq);
+       }
+
+       /* 3. Mark our slots as valid, ensuring commands are visible first */
+       dma_wmb();
+       arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
+
+       /* 4. If we are the owner, take control of the SMMU hardware */
+       if (owner) {
+               /* a. Wait for previous owner to finish */
+               atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
+
+               /* b. Stop gathering work by clearing the owned flag */
+               prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
+                                                  &cmdq->q.llq.atomic.prod);
+               prod &= ~CMDQ_PROD_OWNED_FLAG;
+
+               /*
+                * c. Wait for any gathered work to be written to the queue.
+                * Note that we read our own entries so that we have the control
+                * dependency required by (d).
+                */
+               arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
+
+               /*
+                * d. Advance the hardware prod pointer
+                * Control dependency ordering from the entries becoming valid.
+                */
+               writel_relaxed(prod, cmdq->q.prod_reg);
+
+               /*
+                * e. Tell the next owner we're done
+                * Make sure we've updated the hardware first, so that we don't
+                * race to update prod and potentially move it backwards.
+                */
+               atomic_set_release(&cmdq->owner_prod, prod);
+       }
+
+       /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
+       if (sync) {
+               llq.prod = queue_inc_prod_n(&llq, n);
+               ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
+               if (ret) {
+                       dev_err_ratelimited(smmu->dev,
+                                           "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
+                                           llq.prod,
+                                           readl_relaxed(cmdq->q.prod_reg),
+                                           readl_relaxed(cmdq->q.cons_reg));
+               }
+
+               /*
+                * Try to unlock the cmq lock. This will fail if we're the last
+                * reader, in which case we can safely update cmdq->q.llq.cons
+                */
+               if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
+                       WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
+                       arm_smmu_cmdq_shared_unlock(cmdq);
+               }
+       }
+
+       local_irq_restore(flags);
        return ret;
 }
 
-static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
+                                  struct arm_smmu_cmdq_ent *ent)
 {
-       int ret;
-       bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
-                  (smmu->features & ARM_SMMU_FEAT_COHERENCY);
+       u64 cmd[CMDQ_ENT_DWORDS];
 
-       ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
-                 : __arm_smmu_cmdq_issue_sync(smmu);
-       if (ret)
-               dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
-       return ret;
+       if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
+               dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
+                        ent->opcode);
+               return -EINVAL;
+       }
+
+       return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
+}
+
+static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+{
+       return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
 }
 
 /* Context descriptor manipulation functions */
@@ -1305,6 +1694,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
        int i;
        struct arm_smmu_device *smmu = dev;
        struct arm_smmu_queue *q = &smmu->evtq.q;
+       struct arm_smmu_ll_queue *llq = &q->llq;
        u64 evt[EVTQ_ENT_DWORDS];
 
        do {
@@ -1322,12 +1712,13 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
                 * Not much we can do on overflow, so scream and pretend we're
                 * trying harder.
                 */
-               if (queue_sync_prod(q) == -EOVERFLOW)
+               if (queue_sync_prod_in(q) == -EOVERFLOW)
                        dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
-       } while (!queue_empty(q));
+       } while (!queue_empty(llq));
 
        /* Sync our overflow flag, as we believe we're up to speed */
-       q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
+       llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
+                   Q_IDX(llq, llq->cons);
        return IRQ_HANDLED;
 }
 
@@ -1373,19 +1764,21 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
 {
        struct arm_smmu_device *smmu = dev;
        struct arm_smmu_queue *q = &smmu->priq.q;
+       struct arm_smmu_ll_queue *llq = &q->llq;
        u64 evt[PRIQ_ENT_DWORDS];
 
        do {
                while (!queue_remove_raw(q, evt))
                        arm_smmu_handle_ppr(smmu, evt);
 
-               if (queue_sync_prod(q) == -EOVERFLOW)
+               if (queue_sync_prod_in(q) == -EOVERFLOW)
                        dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
-       } while (!queue_empty(q));
+       } while (!queue_empty(llq));
 
        /* Sync our overflow flag, as we believe we're up to speed */
-       q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
-       writel(q->cons, q->cons_reg);
+       llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
+                     Q_IDX(llq, llq->cons);
+       queue_sync_cons_out(q);
        return IRQ_HANDLED;
 }
 
@@ -1534,6 +1927,23 @@ static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
        if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
                return 0;
 
+       /*
+        * Ensure that we've completed prior invalidation of the main TLBs
+        * before we read 'nr_ats_masters' in case of a concurrent call to
+        * arm_smmu_enable_ats():
+        *
+        *      // unmap()                      // arm_smmu_enable_ats()
+        *      TLBI+SYNC                       atomic_inc(&nr_ats_masters);
+        *      smp_mb();                       [...]
+        *      atomic_read(&nr_ats_masters);   pci_enable_ats() // writel()
+        *
+        * Ensures that we always see the incremented 'nr_ats_masters' count if
+        * ATS was enabled at the PCI device before completion of the TLBI.
+        */
+       smp_mb();
+       if (!atomic_read(&smmu_domain->nr_ats_masters))
+               return 0;
+
        arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
 
        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
@@ -1545,13 +1955,6 @@ static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
 }
 
 /* IO_PGTABLE API */
-static void arm_smmu_tlb_sync(void *cookie)
-{
-       struct arm_smmu_domain *smmu_domain = cookie;
-
-       arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
-}
-
 static void arm_smmu_tlb_inv_context(void *cookie)
 {
        struct arm_smmu_domain *smmu_domain = cookie;
@@ -1570,25 +1973,32 @@ static void arm_smmu_tlb_inv_context(void *cookie)
        /*
         * NOTE: when io-pgtable is in non-strict mode, we may get here with
         * PTEs previously cleared by unmaps on the current CPU not yet visible
-        * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
-        * to guarantee those are observed before the TLBI. Do be careful, 007.
+        * to the SMMU. We are relying on the dma_wmb() implicit during cmd
+        * insertion to guarantee those are observed before the TLBI. Do be
+        * careful, 007.
         */
        arm_smmu_cmdq_issue_cmd(smmu, &cmd);
        arm_smmu_cmdq_issue_sync(smmu);
+       arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
 }
 
-static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
-                                         size_t granule, bool leaf, void *cookie)
+static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
+                                  size_t granule, bool leaf,
+                                  struct arm_smmu_domain *smmu_domain)
 {
-       struct arm_smmu_domain *smmu_domain = cookie;
+       u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
        struct arm_smmu_device *smmu = smmu_domain->smmu;
+       unsigned long start = iova, end = iova + size;
+       int i = 0;
        struct arm_smmu_cmdq_ent cmd = {
                .tlbi = {
                        .leaf   = leaf,
-                       .addr   = iova,
                },
        };
 
+       if (!size)
+               return;
+
        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
                cmd.opcode      = CMDQ_OP_TLBI_NH_VA;
                cmd.tlbi.asid   = smmu_domain->s1_cfg.cd.asid;
@@ -1597,16 +2007,54 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
                cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
        }
 
-       do {
-               arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-               cmd.tlbi.addr += granule;
-       } while (size -= granule);
+       while (iova < end) {
+               if (i == CMDQ_BATCH_ENTRIES) {
+                       arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, false);
+                       i = 0;
+               }
+
+               cmd.tlbi.addr = iova;
+               arm_smmu_cmdq_build_cmd(&cmds[i * CMDQ_ENT_DWORDS], &cmd);
+               iova += granule;
+               i++;
+       }
+
+       arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, true);
+
+       /*
+        * Unfortunately, this can't be leaf-only since we may have
+        * zapped an entire table.
+        */
+       arm_smmu_atc_inv_domain(smmu_domain, 0, start, size);
+}
+
+static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
+                                        unsigned long iova, size_t granule,
+                                        void *cookie)
+{
+       struct arm_smmu_domain *smmu_domain = cookie;
+       struct iommu_domain *domain = &smmu_domain->domain;
+
+       iommu_iotlb_gather_add_page(domain, gather, iova, granule);
 }
 
-static const struct iommu_gather_ops arm_smmu_gather_ops = {
+static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
+                                 size_t granule, void *cookie)
+{
+       arm_smmu_tlb_inv_range(iova, size, granule, false, cookie);
+}
+
+static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
+                                 size_t granule, void *cookie)
+{
+       arm_smmu_tlb_inv_range(iova, size, granule, true, cookie);
+}
+
+static const struct iommu_flush_ops arm_smmu_flush_ops = {
        .tlb_flush_all  = arm_smmu_tlb_inv_context,
-       .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
-       .tlb_sync       = arm_smmu_tlb_sync,
+       .tlb_flush_walk = arm_smmu_tlb_inv_walk,
+       .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
+       .tlb_add_page   = arm_smmu_tlb_inv_page_nosync,
 };
 
 /* IOMMU API */
@@ -1796,7 +2244,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
                .ias            = ias,
                .oas            = oas,
                .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENCY,
-               .tlb            = &arm_smmu_gather_ops,
+               .tlb            = &arm_smmu_flush_ops,
                .iommu_dev      = smmu->dev,
        };
 
@@ -1863,44 +2311,65 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
        }
 }
 
-static int arm_smmu_enable_ats(struct arm_smmu_master *master)
+#ifdef CONFIG_PCI_ATS
+static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
 {
-       int ret;
-       size_t stu;
        struct pci_dev *pdev;
        struct arm_smmu_device *smmu = master->smmu;
        struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
 
        if (!(smmu->features & ARM_SMMU_FEAT_ATS) || !dev_is_pci(master->dev) ||
            !(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS) || pci_ats_disabled())
-               return -ENXIO;
+               return false;
 
        pdev = to_pci_dev(master->dev);
-       if (pdev->untrusted)
-               return -EPERM;
+       return !pdev->untrusted && pdev->ats_cap;
+}
+#else
+static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
+{
+       return false;
+}
+#endif
+
+static void arm_smmu_enable_ats(struct arm_smmu_master *master)
+{
+       size_t stu;
+       struct pci_dev *pdev;
+       struct arm_smmu_device *smmu = master->smmu;
+       struct arm_smmu_domain *smmu_domain = master->domain;
+
+       /* Don't enable ATS at the endpoint if it's not enabled in the STE */
+       if (!master->ats_enabled)
+               return;
 
        /* Smallest Translation Unit: log2 of the smallest supported granule */
        stu = __ffs(smmu->pgsize_bitmap);
+       pdev = to_pci_dev(master->dev);
 
-       ret = pci_enable_ats(pdev, stu);
-       if (ret)
-               return ret;
-
-       master->ats_enabled = true;
-       return 0;
+       atomic_inc(&smmu_domain->nr_ats_masters);
+       arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0);
+       if (pci_enable_ats(pdev, stu))
+               dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
 }
 
 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
 {
        struct arm_smmu_cmdq_ent cmd;
+       struct arm_smmu_domain *smmu_domain = master->domain;
 
-       if (!master->ats_enabled || !dev_is_pci(master->dev))
+       if (!master->ats_enabled)
                return;
 
+       pci_disable_ats(to_pci_dev(master->dev));
+       /*
+        * Ensure ATS is disabled at the endpoint before we issue the
+        * ATC invalidation via the SMMU.
+        */
+       wmb();
        arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
        arm_smmu_atc_inv_master(master, &cmd);
-       pci_disable_ats(to_pci_dev(master->dev));
-       master->ats_enabled = false;
+       atomic_dec(&smmu_domain->nr_ats_masters);
 }
 
 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
@@ -1911,14 +2380,15 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master)
        if (!smmu_domain)
                return;
 
+       arm_smmu_disable_ats(master);
+
        spin_lock_irqsave(&smmu_domain->devices_lock, flags);
        list_del(&master->domain_head);
        spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 
        master->domain = NULL;
+       master->ats_enabled = false;
        arm_smmu_install_ste_for_dev(master);
-
-       arm_smmu_disable_ats(master);
 }
 
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -1958,17 +2428,20 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 
        master->domain = smmu_domain;
 
-       spin_lock_irqsave(&smmu_domain->devices_lock, flags);
-       list_add(&master->domain_head, &smmu_domain->devices);
-       spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
-
        if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
-               arm_smmu_enable_ats(master);
+               master->ats_enabled = arm_smmu_ats_supported(master);
 
        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
                arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg);
 
        arm_smmu_install_ste_for_dev(master);
+
+       spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+       list_add(&master->domain_head, &smmu_domain->devices);
+       spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+       arm_smmu_enable_ats(master);
+
 out_unlock:
        mutex_unlock(&smmu_domain->init_mutex);
        return ret;
@@ -1985,21 +2458,16 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
        return ops->map(ops, iova, paddr, size, prot);
 }
 
-static size_t
-arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
+static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
+                            size_t size, struct iommu_iotlb_gather *gather)
 {
-       int ret;
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
        struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
 
        if (!ops)
                return 0;
 
-       ret = ops->unmap(ops, iova, size);
-       if (ret && arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size))
-               return 0;
-
-       return ret;
+       return ops->unmap(ops, iova, size, gather);
 }
 
 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
@@ -2010,12 +2478,13 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
                arm_smmu_tlb_inv_context(smmu_domain);
 }
 
-static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
+static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
+                               struct iommu_iotlb_gather *gather)
 {
-       struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
-       if (smmu)
-               arm_smmu_cmdq_issue_sync(smmu);
+       arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start,
+                              gather->pgsize, true, smmu_domain);
 }
 
 static phys_addr_t
@@ -2286,13 +2755,13 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
        size_t qsz;
 
        do {
-               qsz = ((1 << q->max_n_shift) * dwords) << 3;
+               qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
                q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
                                              GFP_KERNEL);
                if (q->base || qsz < PAGE_SIZE)
                        break;
 
-               q->max_n_shift--;
+               q->llq.max_n_shift--;
        } while (1);
 
        if (!q->base) {
@@ -2304,7 +2773,7 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 
        if (!WARN_ON(q->base_dma & (qsz - 1))) {
                dev_info(smmu->dev, "allocated %u entries for %s\n",
-                        1 << q->max_n_shift, name);
+                        1 << q->llq.max_n_shift, name);
        }
 
        q->prod_reg     = arm_smmu_page1_fixup(prod_off, smmu);
@@ -2313,24 +2782,55 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 
        q->q_base  = Q_BASE_RWA;
        q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
-       q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
+       q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
 
-       q->prod = q->cons = 0;
+       q->llq.prod = q->llq.cons = 0;
        return 0;
 }
 
+static void arm_smmu_cmdq_free_bitmap(void *data)
+{
+       unsigned long *bitmap = data;
+       bitmap_free(bitmap);
+}
+
+static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
+{
+       int ret = 0;
+       struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
+       unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
+       atomic_long_t *bitmap;
+
+       atomic_set(&cmdq->owner_prod, 0);
+       atomic_set(&cmdq->lock, 0);
+
+       bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
+       if (!bitmap) {
+               dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
+               ret = -ENOMEM;
+       } else {
+               cmdq->valid_map = bitmap;
+               devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
+       }
+
+       return ret;
+}
+
 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
 {
        int ret;
 
        /* cmdq */
-       spin_lock_init(&smmu->cmdq.lock);
        ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
                                      ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
                                      "cmdq");
        if (ret)
                return ret;
 
+       ret = arm_smmu_cmdq_init(smmu);
+       if (ret)
+               return ret;
+
        /* evtq */
        ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
                                      ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
@@ -2708,8 +3208,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 
        /* Command queue */
        writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
-       writel_relaxed(smmu->cmdq.q.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
-       writel_relaxed(smmu->cmdq.q.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
+       writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
+       writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
 
        enables = CR0_CMDQEN;
        ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
@@ -2736,9 +3236,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 
        /* Event queue */
        writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
-       writel_relaxed(smmu->evtq.q.prod,
+       writel_relaxed(smmu->evtq.q.llq.prod,
                       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
-       writel_relaxed(smmu->evtq.q.cons,
+       writel_relaxed(smmu->evtq.q.llq.cons,
                       arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
 
        enables |= CR0_EVTQEN;
@@ -2753,9 +3253,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
        if (smmu->features & ARM_SMMU_FEAT_PRI) {
                writeq_relaxed(smmu->priq.q.q_base,
                               smmu->base + ARM_SMMU_PRIQ_BASE);
-               writel_relaxed(smmu->priq.q.prod,
+               writel_relaxed(smmu->priq.q.llq.prod,
                               arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
-               writel_relaxed(smmu->priq.q.cons,
+               writel_relaxed(smmu->priq.q.llq.cons,
                               arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
 
                enables |= CR0_PRIQEN;
@@ -2909,18 +3409,24 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
        }
 
        /* Queue sizes, capped to ensure natural alignment */
-       smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
-                                        FIELD_GET(IDR1_CMDQS, reg));
-       if (!smmu->cmdq.q.max_n_shift) {
-               /* Odd alignment restrictions on the base, so ignore for now */
-               dev_err(smmu->dev, "unit-length command queue not supported\n");
+       smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
+                                            FIELD_GET(IDR1_CMDQS, reg));
+       if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
+               /*
+                * We don't support splitting up batches, so one batch of
+                * commands plus an extra sync needs to fit inside the command
+                * queue. There's also no way we can handle the weird alignment
+                * restrictions on the base pointer for a unit-length queue.
+                */
+               dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
+                       CMDQ_BATCH_ENTRIES);
                return -ENXIO;
        }
 
-       smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
-                                        FIELD_GET(IDR1_EVTQS, reg));
-       smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
-                                        FIELD_GET(IDR1_PRIQS, reg));
+       smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
+                                            FIELD_GET(IDR1_EVTQS, reg));
+       smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
+                                            FIELD_GET(IDR1_PRIQS, reg));
 
        /* SID/SSID sizes */
        smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
index 64977c131ee62aeeaea4865989ba8b378c924aeb..5b93c79371e98327ca20c9e7fb7a48db42f77a79 100644 (file)
 
 #include <linux/acpi.h>
 #include <linux/acpi_iort.h>
-#include <linux/atomic.h>
+#include <linux/bitfield.h>
 #include <linux/delay.h>
 #include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/io-64-nonatomic-hi-lo.h>
-#include <linux/io-pgtable.h>
-#include <linux/iommu.h>
 #include <linux/iopoll.h>
 #include <linux/init.h>
 #include <linux/moduleparam.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
-#include <linux/spinlock.h>
 
 #include <linux/amba/bus.h>
 #include <linux/fsl/mc.h>
 
-#include "arm-smmu-regs.h"
+#include "arm-smmu.h"
 
 /*
  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
  */
 #define QCOM_DUMMY_VAL -1
 
-#define ARM_MMU500_ACTLR_CPRE          (1 << 1)
-
-#define ARM_MMU500_ACR_CACHE_LOCK      (1 << 26)
-#define ARM_MMU500_ACR_S2CRB_TLBEN     (1 << 10)
-#define ARM_MMU500_ACR_SMTNMB_TLBEN    (1 << 8)
-
 #define TLB_LOOP_TIMEOUT               1000000 /* 1s! */
 #define TLB_SPIN_COUNT                 10
 
-/* Maximum number of context banks per SMMU */
-#define ARM_SMMU_MAX_CBS               128
-
-/* SMMU global address space */
-#define ARM_SMMU_GR0(smmu)             ((smmu)->base)
-#define ARM_SMMU_GR1(smmu)             ((smmu)->base + (1 << (smmu)->pgshift))
-
-/*
- * SMMU global address space with conditional offset to access secure
- * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
- * nsGFSYNR0: 0x450)
- */
-#define ARM_SMMU_GR0_NS(smmu)                                          \
-       ((smmu)->base +                                                 \
-               ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)       \
-                       ? 0x400 : 0))
-
-/*
- * Some 64-bit registers only make sense to write atomically, but in such
- * cases all the data relevant to AArch32 formats lies within the lower word,
- * therefore this actually makes more sense than it might first appear.
- */
-#ifdef CONFIG_64BIT
-#define smmu_write_atomic_lq           writeq_relaxed
-#else
-#define smmu_write_atomic_lq           writel_relaxed
-#endif
-
-/* Translation context bank */
-#define ARM_SMMU_CB(smmu, n)   ((smmu)->cb_base + ((n) << (smmu)->pgshift))
-
 #define MSI_IOVA_BASE                  0x8000000
 #define MSI_IOVA_LENGTH                        0x100000
 
@@ -113,19 +72,6 @@ module_param(disable_bypass, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_bypass,
        "Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
 
-enum arm_smmu_arch_version {
-       ARM_SMMU_V1,
-       ARM_SMMU_V1_64K,
-       ARM_SMMU_V2,
-};
-
-enum arm_smmu_implementation {
-       GENERIC_SMMU,
-       ARM_MMU500,
-       CAVIUM_SMMUV2,
-       QCOM_SMMUV2,
-};
-
 struct arm_smmu_s2cr {
        struct iommu_group              *group;
        int                             count;
@@ -163,117 +109,8 @@ struct arm_smmu_master_cfg {
 #define for_each_cfg_sme(fw, i, idx) \
        for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
 
-struct arm_smmu_device {
-       struct device                   *dev;
-
-       void __iomem                    *base;
-       void __iomem                    *cb_base;
-       unsigned long                   pgshift;
-
-#define ARM_SMMU_FEAT_COHERENT_WALK    (1 << 0)
-#define ARM_SMMU_FEAT_STREAM_MATCH     (1 << 1)
-#define ARM_SMMU_FEAT_TRANS_S1         (1 << 2)
-#define ARM_SMMU_FEAT_TRANS_S2         (1 << 3)
-#define ARM_SMMU_FEAT_TRANS_NESTED     (1 << 4)
-#define ARM_SMMU_FEAT_TRANS_OPS                (1 << 5)
-#define ARM_SMMU_FEAT_VMID16           (1 << 6)
-#define ARM_SMMU_FEAT_FMT_AARCH64_4K   (1 << 7)
-#define ARM_SMMU_FEAT_FMT_AARCH64_16K  (1 << 8)
-#define ARM_SMMU_FEAT_FMT_AARCH64_64K  (1 << 9)
-#define ARM_SMMU_FEAT_FMT_AARCH32_L    (1 << 10)
-#define ARM_SMMU_FEAT_FMT_AARCH32_S    (1 << 11)
-#define ARM_SMMU_FEAT_EXIDS            (1 << 12)
-       u32                             features;
-
-#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
-       u32                             options;
-       enum arm_smmu_arch_version      version;
-       enum arm_smmu_implementation    model;
-
-       u32                             num_context_banks;
-       u32                             num_s2_context_banks;
-       DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
-       struct arm_smmu_cb              *cbs;
-       atomic_t                        irptndx;
-
-       u32                             num_mapping_groups;
-       u16                             streamid_mask;
-       u16                             smr_mask_mask;
-       struct arm_smmu_smr             *smrs;
-       struct arm_smmu_s2cr            *s2crs;
-       struct mutex                    stream_map_mutex;
-
-       unsigned long                   va_size;
-       unsigned long                   ipa_size;
-       unsigned long                   pa_size;
-       unsigned long                   pgsize_bitmap;
-
-       u32                             num_global_irqs;
-       u32                             num_context_irqs;
-       unsigned int                    *irqs;
-       struct clk_bulk_data            *clks;
-       int                             num_clks;
-
-       u32                             cavium_id_base; /* Specific to Cavium */
-
-       spinlock_t                      global_sync_lock;
-
-       /* IOMMU core code handle */
-       struct iommu_device             iommu;
-};
-
-enum arm_smmu_context_fmt {
-       ARM_SMMU_CTX_FMT_NONE,
-       ARM_SMMU_CTX_FMT_AARCH64,
-       ARM_SMMU_CTX_FMT_AARCH32_L,
-       ARM_SMMU_CTX_FMT_AARCH32_S,
-};
-
-struct arm_smmu_cfg {
-       u8                              cbndx;
-       u8                              irptndx;
-       union {
-               u16                     asid;
-               u16                     vmid;
-       };
-       u32                             cbar;
-       enum arm_smmu_context_fmt       fmt;
-};
-#define INVALID_IRPTNDX                        0xff
-
-enum arm_smmu_domain_stage {
-       ARM_SMMU_DOMAIN_S1 = 0,
-       ARM_SMMU_DOMAIN_S2,
-       ARM_SMMU_DOMAIN_NESTED,
-       ARM_SMMU_DOMAIN_BYPASS,
-};
-
-struct arm_smmu_domain {
-       struct arm_smmu_device          *smmu;
-       struct io_pgtable_ops           *pgtbl_ops;
-       const struct iommu_gather_ops   *tlb_ops;
-       struct arm_smmu_cfg             cfg;
-       enum arm_smmu_domain_stage      stage;
-       bool                            non_strict;
-       struct mutex                    init_mutex; /* Protects smmu pointer */
-       spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
-       struct iommu_domain             domain;
-};
-
-struct arm_smmu_option_prop {
-       u32 opt;
-       const char *prop;
-};
-
-static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
-
 static bool using_legacy_binding, using_generic_binding;
 
-static struct arm_smmu_option_prop arm_smmu_options[] = {
-       { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
-       { 0, NULL},
-};
-
 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
 {
        if (pm_runtime_enabled(smmu->dev))
@@ -293,20 +130,6 @@ static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
        return container_of(dom, struct arm_smmu_domain, domain);
 }
 
-static void parse_driver_options(struct arm_smmu_device *smmu)
-{
-       int i = 0;
-
-       do {
-               if (of_property_read_bool(smmu->dev->of_node,
-                                               arm_smmu_options[i].prop)) {
-                       smmu->options |= arm_smmu_options[i].opt;
-                       dev_notice(smmu->dev, "option %s\n",
-                               arm_smmu_options[i].prop);
-               }
-       } while (arm_smmu_options[++i].opt);
-}
-
 static struct device_node *dev_get_dev_node(struct device *dev)
 {
        if (dev_is_pci(dev)) {
@@ -415,15 +238,17 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
 }
 
 /* Wait for any pending TLB invalidations to complete */
-static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
-                               void __iomem *sync, void __iomem *status)
+static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
+                               int sync, int status)
 {
        unsigned int spin_cnt, delay;
+       u32 reg;
 
-       writel_relaxed(QCOM_DUMMY_VAL, sync);
+       arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
        for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
                for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
-                       if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
+                       reg = arm_smmu_readl(smmu, page, status);
+                       if (!(reg & sTLBGSTATUS_GSACTIVE))
                                return;
                        cpu_relax();
                }
@@ -435,12 +260,11 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
 
 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
 {
-       void __iomem *base = ARM_SMMU_GR0(smmu);
        unsigned long flags;
 
        spin_lock_irqsave(&smmu->global_sync_lock, flags);
-       __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
-                           base + ARM_SMMU_GR0_sTLBGSTATUS);
+       __arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
+                           ARM_SMMU_GR0_sTLBGSTATUS);
        spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
 }
 
@@ -448,12 +272,11 @@ static void arm_smmu_tlb_sync_context(void *cookie)
 {
        struct arm_smmu_domain *smmu_domain = cookie;
        struct arm_smmu_device *smmu = smmu_domain->smmu;
-       void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
        unsigned long flags;
 
        spin_lock_irqsave(&smmu_domain->cb_lock, flags);
-       __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
-                           base + ARM_SMMU_CB_TLBSTATUS);
+       __arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
+                           ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
        spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
 }
 
@@ -467,14 +290,13 @@ static void arm_smmu_tlb_sync_vmid(void *cookie)
 static void arm_smmu_tlb_inv_context_s1(void *cookie)
 {
        struct arm_smmu_domain *smmu_domain = cookie;
-       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
-       void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
-
        /*
-        * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
-        * cleared by the current CPU are visible to the SMMU before the TLBI.
+        * The TLBI write may be relaxed, so ensure that PTEs cleared by the
+        * current CPU are visible beforehand.
         */
-       writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
+       wmb();
+       arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
+                         ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
        arm_smmu_tlb_sync_context(cookie);
 }
 
@@ -482,87 +304,143 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie)
 {
        struct arm_smmu_domain *smmu_domain = cookie;
        struct arm_smmu_device *smmu = smmu_domain->smmu;
-       void __iomem *base = ARM_SMMU_GR0(smmu);
 
-       /* NOTE: see above */
-       writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+       /* See above */
+       wmb();
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
        arm_smmu_tlb_sync_global(smmu);
 }
 
-static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
-                                         size_t granule, bool leaf, void *cookie)
+static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
+                                     size_t granule, bool leaf, void *cookie)
 {
        struct arm_smmu_domain *smmu_domain = cookie;
+       struct arm_smmu_device *smmu = smmu_domain->smmu;
        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
-       bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
-       void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
+       int reg, idx = cfg->cbndx;
 
-       if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+       if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
                wmb();
 
-       if (stage1) {
-               reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
-
-               if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
-                       iova &= ~12UL;
-                       iova |= cfg->asid;
-                       do {
-                               writel_relaxed(iova, reg);
-                               iova += granule;
-                       } while (size -= granule);
-               } else {
-                       iova >>= 12;
-                       iova |= (u64)cfg->asid << 48;
-                       do {
-                               writeq_relaxed(iova, reg);
-                               iova += granule >> 12;
-                       } while (size -= granule);
-               }
+       reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
+
+       if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
+               iova = (iova >> 12) << 12;
+               iova |= cfg->asid;
+               do {
+                       arm_smmu_cb_write(smmu, idx, reg, iova);
+                       iova += granule;
+               } while (size -= granule);
        } else {
-               reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
-                             ARM_SMMU_CB_S2_TLBIIPAS2;
                iova >>= 12;
+               iova |= (u64)cfg->asid << 48;
                do {
-                       smmu_write_atomic_lq(iova, reg);
+                       arm_smmu_cb_writeq(smmu, idx, reg, iova);
                        iova += granule >> 12;
                } while (size -= granule);
        }
 }
 
+static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
+                                     size_t granule, bool leaf, void *cookie)
+{
+       struct arm_smmu_domain *smmu_domain = cookie;
+       struct arm_smmu_device *smmu = smmu_domain->smmu;
+       int reg, idx = smmu_domain->cfg.cbndx;
+
+       if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+               wmb();
+
+       reg = leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;
+       iova >>= 12;
+       do {
+               if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
+                       arm_smmu_cb_writeq(smmu, idx, reg, iova);
+               else
+                       arm_smmu_cb_write(smmu, idx, reg, iova);
+               iova += granule >> 12;
+       } while (size -= granule);
+}
+
 /*
  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
  * almost negligible, but the benefit of getting the first one in as far ahead
  * of the sync as possible is significant, hence we don't just make this a
- * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
+ * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think.
  */
 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
                                         size_t granule, bool leaf, void *cookie)
 {
        struct arm_smmu_domain *smmu_domain = cookie;
-       void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
+       struct arm_smmu_device *smmu = smmu_domain->smmu;
 
-       if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+       if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
                wmb();
 
-       writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
+}
+
+static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
+                                 size_t granule, void *cookie)
+{
+       struct arm_smmu_domain *smmu_domain = cookie;
+       const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+
+       ops->tlb_inv_range(iova, size, granule, false, cookie);
+       ops->tlb_sync(cookie);
+}
+
+static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
+                                 size_t granule, void *cookie)
+{
+       struct arm_smmu_domain *smmu_domain = cookie;
+       const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+
+       ops->tlb_inv_range(iova, size, granule, true, cookie);
+       ops->tlb_sync(cookie);
+}
+
+static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather,
+                                 unsigned long iova, size_t granule,
+                                 void *cookie)
+{
+       struct arm_smmu_domain *smmu_domain = cookie;
+       const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+
+       ops->tlb_inv_range(iova, granule, granule, true, cookie);
 }
 
-static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
-       .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
-       .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
-       .tlb_sync       = arm_smmu_tlb_sync_context,
+static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = {
+       .tlb = {
+               .tlb_flush_all  = arm_smmu_tlb_inv_context_s1,
+               .tlb_flush_walk = arm_smmu_tlb_inv_walk,
+               .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
+               .tlb_add_page   = arm_smmu_tlb_add_page,
+       },
+       .tlb_inv_range          = arm_smmu_tlb_inv_range_s1,
+       .tlb_sync               = arm_smmu_tlb_sync_context,
 };
 
-static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
-       .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
-       .tlb_add_flush  = arm_smmu_tlb_inv_range_nosync,
-       .tlb_sync       = arm_smmu_tlb_sync_context,
+static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
+       .tlb = {
+               .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
+               .tlb_flush_walk = arm_smmu_tlb_inv_walk,
+               .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
+               .tlb_add_page   = arm_smmu_tlb_add_page,
+       },
+       .tlb_inv_range          = arm_smmu_tlb_inv_range_s2,
+       .tlb_sync               = arm_smmu_tlb_sync_context,
 };
 
-static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
-       .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
-       .tlb_add_flush  = arm_smmu_tlb_inv_vmid_nosync,
-       .tlb_sync       = arm_smmu_tlb_sync_vmid,
+static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
+       .tlb = {
+               .tlb_flush_all  = arm_smmu_tlb_inv_context_s2,
+               .tlb_flush_walk = arm_smmu_tlb_inv_walk,
+               .tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
+               .tlb_add_page   = arm_smmu_tlb_add_page,
+       },
+       .tlb_inv_range          = arm_smmu_tlb_inv_vmid_nosync,
+       .tlb_sync               = arm_smmu_tlb_sync_vmid,
 };
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
@@ -571,26 +449,22 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
        unsigned long iova;
        struct iommu_domain *domain = dev;
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
        struct arm_smmu_device *smmu = smmu_domain->smmu;
-       void __iomem *gr1_base = ARM_SMMU_GR1(smmu);
-       void __iomem *cb_base;
-
-       cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
-       fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
+       int idx = smmu_domain->cfg.cbndx;
 
+       fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
        if (!(fsr & FSR_FAULT))
                return IRQ_NONE;
 
-       fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
-       iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
-       cbfrsynra = readl_relaxed(gr1_base + ARM_SMMU_GR1_CBFRSYNRA(cfg->cbndx));
+       fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
+       iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
+       cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
 
        dev_err_ratelimited(smmu->dev,
        "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
-                           fsr, iova, fsynr, cbfrsynra, cfg->cbndx);
+                           fsr, iova, fsynr, cbfrsynra, idx);
 
-       writel(fsr, cb_base + ARM_SMMU_CB_FSR);
+       arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
        return IRQ_HANDLED;
 }
 
@@ -598,12 +472,11 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
 {
        u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
        struct arm_smmu_device *smmu = dev;
-       void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
 
-       gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
-       gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
-       gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
-       gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
+       gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
+       gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
+       gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
+       gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
 
        if (!gfsr)
                return IRQ_NONE;
@@ -614,7 +487,7 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
                "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
                gfsr, gfsynr0, gfsynr1, gfsynr2);
 
-       writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
        return IRQ_HANDLED;
 }
 
@@ -627,16 +500,16 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 
        cb->cfg = cfg;
 
-       /* TTBCR */
+       /* TCR */
        if (stage1) {
                if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
                        cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
                } else {
                        cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
                        cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
-                       cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
+                       cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM);
                        if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
-                               cb->tcr[1] |= TTBCR2_AS;
+                               cb->tcr[1] |= TCR2_AS;
                }
        } else {
                cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
@@ -649,9 +522,9 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
                        cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
                } else {
                        cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
-                       cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+                       cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
                        cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
-                       cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+                       cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid);
                }
        } else {
                cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
@@ -675,74 +548,71 @@ static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
        bool stage1;
        struct arm_smmu_cb *cb = &smmu->cbs[idx];
        struct arm_smmu_cfg *cfg = cb->cfg;
-       void __iomem *cb_base, *gr1_base;
-
-       cb_base = ARM_SMMU_CB(smmu, idx);
 
        /* Unassigned context banks only need disabling */
        if (!cfg) {
-               writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
+               arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
                return;
        }
 
-       gr1_base = ARM_SMMU_GR1(smmu);
        stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
 
        /* CBA2R */
        if (smmu->version > ARM_SMMU_V1) {
                if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
-                       reg = CBA2R_RW64_64BIT;
+                       reg = CBA2R_VA64;
                else
-                       reg = CBA2R_RW64_32BIT;
+                       reg = 0;
                /* 16-bit VMIDs live in CBA2R */
                if (smmu->features & ARM_SMMU_FEAT_VMID16)
-                       reg |= cfg->vmid << CBA2R_VMID_SHIFT;
+                       reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid);
 
-               writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
+               arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
        }
 
        /* CBAR */
-       reg = cfg->cbar;
+       reg = FIELD_PREP(CBAR_TYPE, cfg->cbar);
        if (smmu->version < ARM_SMMU_V2)
-               reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
+               reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx);
 
        /*
         * Use the weakest shareability/memory types, so they are
         * overridden by the ttbcr/pte.
         */
        if (stage1) {
-               reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
-                       (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
+               reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) |
+                       FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB);
        } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
                /* 8-bit VMIDs live in CBAR */
-               reg |= cfg->vmid << CBAR_VMID_SHIFT;
+               reg |= FIELD_PREP(CBAR_VMID, cfg->vmid);
        }
-       writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
+       arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
 
        /*
-        * TTBCR
+        * TCR
         * We must write this before the TTBRs, since it determines the
         * access behaviour of some fields (in particular, ASID[15:8]).
         */
        if (stage1 && smmu->version > ARM_SMMU_V1)
-               writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
-       writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
+               arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
+       arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
 
        /* TTBRs */
        if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
-               writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
-               writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
-               writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
+               arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
+               arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
+               arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
        } else {
-               writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
+               arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
                if (stage1)
-                       writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
+                       arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
+                                          cb->ttbr[1]);
        }
 
        /* MAIRs (stage-1 only) */
        if (stage1) {
-               writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
-               writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
+               arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
+               arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
        }
 
        /* SCTLR */
@@ -752,7 +622,7 @@ static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
        if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
                reg |= SCTLR_E;
 
-       writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
+       arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
 }
 
 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
@@ -842,7 +712,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
                        ias = min(ias, 32UL);
                        oas = min(oas, 32UL);
                }
-               smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
+               smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
                break;
        case ARM_SMMU_DOMAIN_NESTED:
                /*
@@ -862,9 +732,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
                        oas = min(oas, 40UL);
                }
                if (smmu->version == ARM_SMMU_V2)
-                       smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
+                       smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
                else
-                       smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+                       smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
                break;
        default:
                ret = -EINVAL;
@@ -884,23 +754,29 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
        }
 
        if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
-               cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
+               cfg->vmid = cfg->cbndx + 1;
        else
-               cfg->asid = cfg->cbndx + smmu->cavium_id_base;
+               cfg->asid = cfg->cbndx;
+
+       smmu_domain->smmu = smmu;
+       if (smmu->impl && smmu->impl->init_context) {
+               ret = smmu->impl->init_context(smmu_domain);
+               if (ret)
+                       goto out_unlock;
+       }
 
        pgtbl_cfg = (struct io_pgtable_cfg) {
                .pgsize_bitmap  = smmu->pgsize_bitmap,
                .ias            = ias,
                .oas            = oas,
                .coherent_walk  = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
-               .tlb            = smmu_domain->tlb_ops,
+               .tlb            = &smmu_domain->flush_ops->tlb,
                .iommu_dev      = smmu->dev,
        };
 
        if (smmu_domain->non_strict)
                pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
 
-       smmu_domain->smmu = smmu;
        pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
        if (!pgtbl_ops) {
                ret = -ENOMEM;
@@ -1019,24 +895,24 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
 {
        struct arm_smmu_smr *smr = smmu->smrs + idx;
-       u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
+       u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask);
 
        if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
                reg |= SMR_VALID;
-       writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
 }
 
 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
 {
        struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
-       u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
-                 (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
-                 (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
+       u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) |
+                 FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) |
+                 FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg);
 
        if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
            smmu->smrs[idx].valid)
                reg |= S2CR_EXIDVALID;
-       writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
 }
 
 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
@@ -1052,7 +928,6 @@ static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
  */
 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
 {
-       void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
        u32 smr;
 
        if (!smmu->smrs)
@@ -1063,15 +938,15 @@ static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
         * bits are set, so check each one separately. We can reject
         * masters later if they try to claim IDs outside these masks.
         */
-       smr = smmu->streamid_mask << SMR_ID_SHIFT;
-       writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
-       smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
-       smmu->streamid_mask = smr >> SMR_ID_SHIFT;
+       smr = FIELD_PREP(SMR_ID, smmu->streamid_mask);
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
+       smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
+       smmu->streamid_mask = FIELD_GET(SMR_ID, smr);
 
-       smr = smmu->streamid_mask << SMR_MASK_SHIFT;
-       writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
-       smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
-       smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
+       smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask);
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr);
+       smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0));
+       smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr);
 }
 
 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
@@ -1140,8 +1015,8 @@ static int arm_smmu_master_alloc_smes(struct device *dev)
        mutex_lock(&smmu->stream_map_mutex);
        /* Figure out a viable stream map entry allocation */
        for_each_cfg_sme(fwspec, i, idx) {
-               u16 sid = fwspec->ids[i];
-               u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
+               u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
+               u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
 
                if (idx != INVALID_SMENDX) {
                        ret = -EEXIST;
@@ -1301,7 +1176,7 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 }
 
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
-                            size_t size)
+                            size_t size, struct iommu_iotlb_gather *gather)
 {
        struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
        struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1311,7 +1186,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
                return 0;
 
        arm_smmu_rpm_get(smmu);
-       ret = ops->unmap(ops, iova, size);
+       ret = ops->unmap(ops, iova, size, gather);
        arm_smmu_rpm_put(smmu);
 
        return ret;
@@ -1322,21 +1197,22 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
        struct arm_smmu_device *smmu = smmu_domain->smmu;
 
-       if (smmu_domain->tlb_ops) {
+       if (smmu_domain->flush_ops) {
                arm_smmu_rpm_get(smmu);
-               smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
+               smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain);
                arm_smmu_rpm_put(smmu);
        }
 }
 
-static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
+static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
+                               struct iommu_iotlb_gather *gather)
 {
        struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
        struct arm_smmu_device *smmu = smmu_domain->smmu;
 
-       if (smmu_domain->tlb_ops) {
+       if (smmu_domain->flush_ops) {
                arm_smmu_rpm_get(smmu);
-               smmu_domain->tlb_ops->tlb_sync(smmu_domain);
+               smmu_domain->flush_ops->tlb_sync(smmu_domain);
                arm_smmu_rpm_put(smmu);
        }
 }
@@ -1349,28 +1225,25 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
        struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
        struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
        struct device *dev = smmu->dev;
-       void __iomem *cb_base;
+       void __iomem *reg;
        u32 tmp;
        u64 phys;
        unsigned long va, flags;
-       int ret;
+       int ret, idx = cfg->cbndx;
 
        ret = arm_smmu_rpm_get(smmu);
        if (ret < 0)
                return 0;
 
-       cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
-
        spin_lock_irqsave(&smmu_domain->cb_lock, flags);
-       /* ATS1 registers can only be written atomically */
        va = iova & ~0xfffUL;
-       if (smmu->version == ARM_SMMU_V2)
-               smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
-       else /* Register is only 32-bit in v1 */
-               writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
+       if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
+               arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
+       else
+               arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
 
-       if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
-                                     !(tmp & ATSR_ACTIVE), 5, 50)) {
+       reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
+       if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) {
                spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
                dev_err(dev,
                        "iova to phys timed out on %pad. Falling back to software table walk.\n",
@@ -1378,7 +1251,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
                return ops->iova_to_phys(ops, iova);
        }
 
-       phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
+       phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
        spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
        if (phys & CB_PAR_F) {
                dev_err(dev, "translation fault!\n");
@@ -1466,8 +1339,8 @@ static int arm_smmu_add_device(struct device *dev)
 
        ret = -EINVAL;
        for (i = 0; i < fwspec->num_ids; i++) {
-               u16 sid = fwspec->ids[i];
-               u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
+               u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]);
+               u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]);
 
                if (sid & ~smmu->streamid_mask) {
                        dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
@@ -1648,12 +1521,12 @@ static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
        u32 mask, fwid = 0;
 
        if (args->args_count > 0)
-               fwid |= (u16)args->args[0];
+               fwid |= FIELD_PREP(SMR_ID, args->args[0]);
 
        if (args->args_count > 1)
-               fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
+               fwid |= FIELD_PREP(SMR_MASK, args->args[1]);
        else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
-               fwid |= (u16)mask << SMR_MASK_SHIFT;
+               fwid |= FIELD_PREP(SMR_MASK, mask);
 
        return iommu_fwspec_add_ids(dev, &fwid, 1);
 }
@@ -1706,13 +1579,12 @@ static struct iommu_ops arm_smmu_ops = {
 
 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 {
-       void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
        int i;
-       u32 reg, major;
+       u32 reg;
 
        /* clear global FSR */
-       reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
-       writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
+       reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
 
        /*
         * Reset stream mapping groups: Initial values mark all SMRn as
@@ -1721,47 +1593,17 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
        for (i = 0; i < smmu->num_mapping_groups; ++i)
                arm_smmu_write_sme(smmu, i);
 
-       if (smmu->model == ARM_MMU500) {
-               /*
-                * Before clearing ARM_MMU500_ACTLR_CPRE, need to
-                * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
-                * bit is only present in MMU-500r2 onwards.
-                */
-               reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
-               major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
-               reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
-               if (major >= 2)
-                       reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
-               /*
-                * Allow unmatched Stream IDs to allocate bypass
-                * TLB entries for reduced latency.
-                */
-               reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
-               writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
-       }
-
        /* Make sure all context banks are disabled and clear CB_FSR  */
        for (i = 0; i < smmu->num_context_banks; ++i) {
-               void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
-
                arm_smmu_write_context_bank(smmu, i);
-               writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
-               /*
-                * Disable MMU-500's not-particularly-beneficial next-page
-                * prefetcher for the sake of errata #841119 and #826419.
-                */
-               if (smmu->model == ARM_MMU500) {
-                       reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
-                       reg &= ~ARM_MMU500_ACTLR_CPRE;
-                       writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
-               }
+               arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT);
        }
 
        /* Invalidate the TLB, just in case */
-       writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLH);
-       writel_relaxed(QCOM_DUMMY_VAL, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
 
-       reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+       reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
 
        /* Enable fault reporting */
        reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
@@ -1780,7 +1622,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
        reg &= ~sCR0_FB;
 
        /* Don't upgrade barriers */
-       reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
+       reg &= ~(sCR0_BSU);
 
        if (smmu->features & ARM_SMMU_FEAT_VMID16)
                reg |= sCR0_VMID16EN;
@@ -1788,9 +1630,12 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
        if (smmu->features & ARM_SMMU_FEAT_EXIDS)
                reg |= sCR0_EXIDENABLE;
 
+       if (smmu->impl && smmu->impl->reset)
+               smmu->impl->reset(smmu);
+
        /* Push the button */
        arm_smmu_tlb_sync_global(smmu);
-       writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
 }
 
 static int arm_smmu_id_size_to_bits(int size)
@@ -1814,8 +1659,7 @@ static int arm_smmu_id_size_to_bits(int size)
 
 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 {
-       unsigned long size;
-       void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+       unsigned int size;
        u32 id;
        bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
        int i;
@@ -1825,7 +1669,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
                        smmu->version == ARM_SMMU_V2 ? 2 : 1);
 
        /* ID0 */
-       id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
+       id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
 
        /* Restrict available stages based on module parameter */
        if (force_stage == 1)
@@ -1879,12 +1723,12 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
                smmu->features |= ARM_SMMU_FEAT_EXIDS;
                size = 1 << 16;
        } else {
-               size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
+               size = 1 << FIELD_GET(ID0_NUMSIDB, id);
        }
        smmu->streamid_mask = size - 1;
        if (id & ID0_SMS) {
                smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
-               size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
+               size = FIELD_GET(ID0_NUMSMRG, id);
                if (size == 0) {
                        dev_err(smmu->dev,
                                "stream-matching supported, but no SMRs present!\n");
@@ -1898,7 +1742,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
                        return -ENOMEM;
 
                dev_notice(smmu->dev,
-                          "\tstream matching with %lu register groups", size);
+                          "\tstream matching with %u register groups", size);
        }
        /* s2cr->type == 0 means translation, so initialise explicitly */
        smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
@@ -1919,49 +1763,38 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
        }
 
        /* ID1 */
-       id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
+       id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
        smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
 
        /* Check for size mismatch of SMMU address space from mapped region */
-       size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
-       size <<= smmu->pgshift;
-       if (smmu->cb_base != gr0_base + size)
+       size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1);
+       if (smmu->numpage != 2 * size << smmu->pgshift)
                dev_warn(smmu->dev,
-                       "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
-                       size * 2, (smmu->cb_base - gr0_base) * 2);
+                       "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
+                       2 * size << smmu->pgshift, smmu->numpage);
+       /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
+       smmu->numpage = size;
 
-       smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
-       smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
+       smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id);
+       smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id);
        if (smmu->num_s2_context_banks > smmu->num_context_banks) {
                dev_err(smmu->dev, "impossible number of S2 context banks!\n");
                return -ENODEV;
        }
        dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
                   smmu->num_context_banks, smmu->num_s2_context_banks);
-       /*
-        * Cavium CN88xx erratum #27704.
-        * Ensure ASID and VMID allocation is unique across all SMMUs in
-        * the system.
-        */
-       if (smmu->model == CAVIUM_SMMUV2) {
-               smmu->cavium_id_base =
-                       atomic_add_return(smmu->num_context_banks,
-                                         &cavium_smmu_context_count);
-               smmu->cavium_id_base -= smmu->num_context_banks;
-               dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
-       }
        smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
                                 sizeof(*smmu->cbs), GFP_KERNEL);
        if (!smmu->cbs)
                return -ENOMEM;
 
        /* ID2 */
-       id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
-       size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
+       id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
+       size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id));
        smmu->ipa_size = size;
 
        /* The output mask is also applied for bypass */
-       size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
+       size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id));
        smmu->pa_size = size;
 
        if (id & ID2_VMID16)
@@ -1981,7 +1814,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
                if (smmu->version == ARM_SMMU_V1_64K)
                        smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
        } else {
-               size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
+               size = FIELD_GET(ID2_UBS, id);
                smmu->va_size = arm_smmu_id_size_to_bits(size);
                if (id & ID2_PTFS_4K)
                        smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
@@ -2018,6 +1851,9 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
                dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
                           smmu->ipa_size, smmu->pa_size);
 
+       if (smmu->impl && smmu->impl->cfg_probe)
+               return smmu->impl->cfg_probe(smmu);
+
        return 0;
 }
 
@@ -2130,8 +1966,6 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
        smmu->version = data->version;
        smmu->model = data->model;
 
-       parse_driver_options(smmu);
-
        legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
        if (legacy_binding && !using_generic_binding) {
                if (!using_legacy_binding)
@@ -2194,12 +2028,20 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
        if (err)
                return err;
 
+       smmu = arm_smmu_impl_init(smmu);
+       if (IS_ERR(smmu))
+               return PTR_ERR(smmu);
+
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        ioaddr = res->start;
        smmu->base = devm_ioremap_resource(dev, res);
        if (IS_ERR(smmu->base))
                return PTR_ERR(smmu->base);
-       smmu->cb_base = smmu->base + resource_size(res) / 2;
+       /*
+        * The resource size should effectively match the value of SMMU_TOP;
+        * stash that temporarily until we know PAGESIZE to validate it with.
+        */
+       smmu->numpage = resource_size(res);
 
        num_irqs = 0;
        while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
@@ -2339,7 +2181,7 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev)
 
        arm_smmu_rpm_get(smmu);
        /* Turn the thing off */
-       writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+       arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, sCR0_CLIENTPD);
        arm_smmu_rpm_put(smmu);
 
        if (pm_runtime_enabled(smmu->dev))
diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
new file mode 100644 (file)
index 0000000..b19b6ca
--- /dev/null
@@ -0,0 +1,402 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * IOMMU API for ARM architected SMMU implementations.
+ *
+ * Copyright (C) 2013 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#ifndef _ARM_SMMU_H
+#define _ARM_SMMU_H
+
+#include <linux/atomic.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/io-pgtable.h>
+#include <linux/iommu.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+/* Configuration registers */
+#define ARM_SMMU_GR0_sCR0              0x0
+#define sCR0_VMID16EN                  BIT(31)
+#define sCR0_BSU                       GENMASK(15, 14)
+#define sCR0_FB                                BIT(13)
+#define sCR0_PTM                       BIT(12)
+#define sCR0_VMIDPNE                   BIT(11)
+#define sCR0_USFCFG                    BIT(10)
+#define sCR0_GCFGFIE                   BIT(5)
+#define sCR0_GCFGFRE                   BIT(4)
+#define sCR0_EXIDENABLE                        BIT(3)
+#define sCR0_GFIE                      BIT(2)
+#define sCR0_GFRE                      BIT(1)
+#define sCR0_CLIENTPD                  BIT(0)
+
+/* Auxiliary Configuration register */
+#define ARM_SMMU_GR0_sACR              0x10
+
+/* Identification registers */
+#define ARM_SMMU_GR0_ID0               0x20
+#define ID0_S1TS                       BIT(30)
+#define ID0_S2TS                       BIT(29)
+#define ID0_NTS                                BIT(28)
+#define ID0_SMS                                BIT(27)
+#define ID0_ATOSNS                     BIT(26)
+#define ID0_PTFS_NO_AARCH32            BIT(25)
+#define ID0_PTFS_NO_AARCH32S           BIT(24)
+#define ID0_NUMIRPT                    GENMASK(23, 16)
+#define ID0_CTTW                       BIT(14)
+#define ID0_NUMSIDB                    GENMASK(12, 9)
+#define ID0_EXIDS                      BIT(8)
+#define ID0_NUMSMRG                    GENMASK(7, 0)
+
+#define ARM_SMMU_GR0_ID1               0x24
+#define ID1_PAGESIZE                   BIT(31)
+#define ID1_NUMPAGENDXB                        GENMASK(30, 28)
+#define ID1_NUMS2CB                    GENMASK(23, 16)
+#define ID1_NUMCB                      GENMASK(7, 0)
+
+#define ARM_SMMU_GR0_ID2               0x28
+#define ID2_VMID16                     BIT(15)
+#define ID2_PTFS_64K                   BIT(14)
+#define ID2_PTFS_16K                   BIT(13)
+#define ID2_PTFS_4K                    BIT(12)
+#define ID2_UBS                                GENMASK(11, 8)
+#define ID2_OAS                                GENMASK(7, 4)
+#define ID2_IAS                                GENMASK(3, 0)
+
+#define ARM_SMMU_GR0_ID3               0x2c
+#define ARM_SMMU_GR0_ID4               0x30
+#define ARM_SMMU_GR0_ID5               0x34
+#define ARM_SMMU_GR0_ID6               0x38
+
+#define ARM_SMMU_GR0_ID7               0x3c
+#define ID7_MAJOR                      GENMASK(7, 4)
+#define ID7_MINOR                      GENMASK(3, 0)
+
+#define ARM_SMMU_GR0_sGFSR             0x48
+#define ARM_SMMU_GR0_sGFSYNR0          0x50
+#define ARM_SMMU_GR0_sGFSYNR1          0x54
+#define ARM_SMMU_GR0_sGFSYNR2          0x58
+
+/* Global TLB invalidation */
+#define ARM_SMMU_GR0_TLBIVMID          0x64
+#define ARM_SMMU_GR0_TLBIALLNSNH       0x68
+#define ARM_SMMU_GR0_TLBIALLH          0x6c
+#define ARM_SMMU_GR0_sTLBGSYNC         0x70
+
+#define ARM_SMMU_GR0_sTLBGSTATUS       0x74
+#define sTLBGSTATUS_GSACTIVE           BIT(0)
+
+/* Stream mapping registers */
+#define ARM_SMMU_GR0_SMR(n)            (0x800 + ((n) << 2))
+#define SMR_VALID                      BIT(31)
+#define SMR_MASK                       GENMASK(31, 16)
+#define SMR_ID                         GENMASK(15, 0)
+
+#define ARM_SMMU_GR0_S2CR(n)           (0xc00 + ((n) << 2))
+#define S2CR_PRIVCFG                   GENMASK(25, 24)
+enum arm_smmu_s2cr_privcfg {
+       S2CR_PRIVCFG_DEFAULT,
+       S2CR_PRIVCFG_DIPAN,
+       S2CR_PRIVCFG_UNPRIV,
+       S2CR_PRIVCFG_PRIV,
+};
+#define S2CR_TYPE                      GENMASK(17, 16)
+enum arm_smmu_s2cr_type {
+       S2CR_TYPE_TRANS,
+       S2CR_TYPE_BYPASS,
+       S2CR_TYPE_FAULT,
+};
+#define S2CR_EXIDVALID                 BIT(10)
+#define S2CR_CBNDX                     GENMASK(7, 0)
+
+/* Context bank attribute registers */
+#define ARM_SMMU_GR1_CBAR(n)           (0x0 + ((n) << 2))
+#define CBAR_IRPTNDX                   GENMASK(31, 24)
+#define CBAR_TYPE                      GENMASK(17, 16)
+enum arm_smmu_cbar_type {
+       CBAR_TYPE_S2_TRANS,
+       CBAR_TYPE_S1_TRANS_S2_BYPASS,
+       CBAR_TYPE_S1_TRANS_S2_FAULT,
+       CBAR_TYPE_S1_TRANS_S2_TRANS,
+};
+#define CBAR_S1_MEMATTR                        GENMASK(15, 12)
+#define CBAR_S1_MEMATTR_WB             0xf
+#define CBAR_S1_BPSHCFG                        GENMASK(9, 8)
+#define CBAR_S1_BPSHCFG_NSH            3
+#define CBAR_VMID                      GENMASK(7, 0)
+
+#define ARM_SMMU_GR1_CBFRSYNRA(n)      (0x400 + ((n) << 2))
+
+#define ARM_SMMU_GR1_CBA2R(n)          (0x800 + ((n) << 2))
+#define CBA2R_VMID16                   GENMASK(31, 16)
+#define CBA2R_VA64                     BIT(0)
+
+#define ARM_SMMU_CB_SCTLR              0x0
+#define SCTLR_S1_ASIDPNE               BIT(12)
+#define SCTLR_CFCFG                    BIT(7)
+#define SCTLR_CFIE                     BIT(6)
+#define SCTLR_CFRE                     BIT(5)
+#define SCTLR_E                                BIT(4)
+#define SCTLR_AFE                      BIT(2)
+#define SCTLR_TRE                      BIT(1)
+#define SCTLR_M                                BIT(0)
+
+#define ARM_SMMU_CB_ACTLR              0x4
+
+#define ARM_SMMU_CB_RESUME             0x8
+#define RESUME_TERMINATE               BIT(0)
+
+#define ARM_SMMU_CB_TCR2               0x10
+#define TCR2_SEP                       GENMASK(17, 15)
+#define TCR2_SEP_UPSTREAM              0x7
+#define TCR2_AS                                BIT(4)
+
+#define ARM_SMMU_CB_TTBR0              0x20
+#define ARM_SMMU_CB_TTBR1              0x28
+#define TTBRn_ASID                     GENMASK_ULL(63, 48)
+
+#define ARM_SMMU_CB_TCR                        0x30
+#define ARM_SMMU_CB_CONTEXTIDR         0x34
+#define ARM_SMMU_CB_S1_MAIR0           0x38
+#define ARM_SMMU_CB_S1_MAIR1           0x3c
+
+#define ARM_SMMU_CB_PAR                        0x50
+#define CB_PAR_F                       BIT(0)
+
+#define ARM_SMMU_CB_FSR                        0x58
+#define FSR_MULTI                      BIT(31)
+#define FSR_SS                         BIT(30)
+#define FSR_UUT                                BIT(8)
+#define FSR_ASF                                BIT(7)
+#define FSR_TLBLKF                     BIT(6)
+#define FSR_TLBMCF                     BIT(5)
+#define FSR_EF                         BIT(4)
+#define FSR_PF                         BIT(3)
+#define FSR_AFF                                BIT(2)
+#define FSR_TF                         BIT(1)
+
+#define FSR_IGN                                (FSR_AFF | FSR_ASF | \
+                                        FSR_TLBMCF | FSR_TLBLKF)
+#define FSR_FAULT                      (FSR_MULTI | FSR_SS | FSR_UUT | \
+                                        FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
+
+#define ARM_SMMU_CB_FAR                        0x60
+
+#define ARM_SMMU_CB_FSYNR0             0x68
+#define FSYNR0_WNR                     BIT(4)
+
+#define ARM_SMMU_CB_S1_TLBIVA          0x600
+#define ARM_SMMU_CB_S1_TLBIASID                0x610
+#define ARM_SMMU_CB_S1_TLBIVAL         0x620
+#define ARM_SMMU_CB_S2_TLBIIPAS2       0x630
+#define ARM_SMMU_CB_S2_TLBIIPAS2L      0x638
+#define ARM_SMMU_CB_TLBSYNC            0x7f0
+#define ARM_SMMU_CB_TLBSTATUS          0x7f4
+#define ARM_SMMU_CB_ATS1PR             0x800
+
+#define ARM_SMMU_CB_ATSR               0x8f0
+#define ATSR_ACTIVE                    BIT(0)
+
+
+/* Maximum number of context banks per SMMU */
+#define ARM_SMMU_MAX_CBS               128
+
+
+/* Shared driver definitions */
+enum arm_smmu_arch_version {
+       ARM_SMMU_V1,
+       ARM_SMMU_V1_64K,
+       ARM_SMMU_V2,
+};
+
+enum arm_smmu_implementation {
+       GENERIC_SMMU,
+       ARM_MMU500,
+       CAVIUM_SMMUV2,
+       QCOM_SMMUV2,
+};
+
+struct arm_smmu_device {
+       struct device                   *dev;
+
+       void __iomem                    *base;
+       unsigned int                    numpage;
+       unsigned int                    pgshift;
+
+#define ARM_SMMU_FEAT_COHERENT_WALK    (1 << 0)
+#define ARM_SMMU_FEAT_STREAM_MATCH     (1 << 1)
+#define ARM_SMMU_FEAT_TRANS_S1         (1 << 2)
+#define ARM_SMMU_FEAT_TRANS_S2         (1 << 3)
+#define ARM_SMMU_FEAT_TRANS_NESTED     (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS                (1 << 5)
+#define ARM_SMMU_FEAT_VMID16           (1 << 6)
+#define ARM_SMMU_FEAT_FMT_AARCH64_4K   (1 << 7)
+#define ARM_SMMU_FEAT_FMT_AARCH64_16K  (1 << 8)
+#define ARM_SMMU_FEAT_FMT_AARCH64_64K  (1 << 9)
+#define ARM_SMMU_FEAT_FMT_AARCH32_L    (1 << 10)
+#define ARM_SMMU_FEAT_FMT_AARCH32_S    (1 << 11)
+#define ARM_SMMU_FEAT_EXIDS            (1 << 12)
+       u32                             features;
+
+       enum arm_smmu_arch_version      version;
+       enum arm_smmu_implementation    model;
+       const struct arm_smmu_impl      *impl;
+
+       u32                             num_context_banks;
+       u32                             num_s2_context_banks;
+       DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
+       struct arm_smmu_cb              *cbs;
+       atomic_t                        irptndx;
+
+       u32                             num_mapping_groups;
+       u16                             streamid_mask;
+       u16                             smr_mask_mask;
+       struct arm_smmu_smr             *smrs;
+       struct arm_smmu_s2cr            *s2crs;
+       struct mutex                    stream_map_mutex;
+
+       unsigned long                   va_size;
+       unsigned long                   ipa_size;
+       unsigned long                   pa_size;
+       unsigned long                   pgsize_bitmap;
+
+       u32                             num_global_irqs;
+       u32                             num_context_irqs;
+       unsigned int                    *irqs;
+       struct clk_bulk_data            *clks;
+       int                             num_clks;
+
+       spinlock_t                      global_sync_lock;
+
+       /* IOMMU core code handle */
+       struct iommu_device             iommu;
+};
+
+enum arm_smmu_context_fmt {
+       ARM_SMMU_CTX_FMT_NONE,
+       ARM_SMMU_CTX_FMT_AARCH64,
+       ARM_SMMU_CTX_FMT_AARCH32_L,
+       ARM_SMMU_CTX_FMT_AARCH32_S,
+};
+
+struct arm_smmu_cfg {
+       u8                              cbndx;
+       u8                              irptndx;
+       union {
+               u16                     asid;
+               u16                     vmid;
+       };
+       enum arm_smmu_cbar_type         cbar;
+       enum arm_smmu_context_fmt       fmt;
+};
+#define INVALID_IRPTNDX                        0xff
+
+enum arm_smmu_domain_stage {
+       ARM_SMMU_DOMAIN_S1 = 0,
+       ARM_SMMU_DOMAIN_S2,
+       ARM_SMMU_DOMAIN_NESTED,
+       ARM_SMMU_DOMAIN_BYPASS,
+};
+
+struct arm_smmu_flush_ops {
+       struct iommu_flush_ops          tlb;
+       void (*tlb_inv_range)(unsigned long iova, size_t size, size_t granule,
+                             bool leaf, void *cookie);
+       void (*tlb_sync)(void *cookie);
+};
+
+struct arm_smmu_domain {
+       struct arm_smmu_device          *smmu;
+       struct io_pgtable_ops           *pgtbl_ops;
+       const struct arm_smmu_flush_ops *flush_ops;
+       struct arm_smmu_cfg             cfg;
+       enum arm_smmu_domain_stage      stage;
+       bool                            non_strict;
+       struct mutex                    init_mutex; /* Protects smmu pointer */
+       spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
+       struct iommu_domain             domain;
+};
+
+
+/* Implementation details, yay! */
+struct arm_smmu_impl {
+       u32 (*read_reg)(struct arm_smmu_device *smmu, int page, int offset);
+       void (*write_reg)(struct arm_smmu_device *smmu, int page, int offset,
+                         u32 val);
+       u64 (*read_reg64)(struct arm_smmu_device *smmu, int page, int offset);
+       void (*write_reg64)(struct arm_smmu_device *smmu, int page, int offset,
+                           u64 val);
+       int (*cfg_probe)(struct arm_smmu_device *smmu);
+       int (*reset)(struct arm_smmu_device *smmu);
+       int (*init_context)(struct arm_smmu_domain *smmu_domain);
+};
+
+static inline void __iomem *arm_smmu_page(struct arm_smmu_device *smmu, int n)
+{
+       return smmu->base + (n << smmu->pgshift);
+}
+
+static inline u32 arm_smmu_readl(struct arm_smmu_device *smmu, int page, int offset)
+{
+       if (smmu->impl && unlikely(smmu->impl->read_reg))
+               return smmu->impl->read_reg(smmu, page, offset);
+       return readl_relaxed(arm_smmu_page(smmu, page) + offset);
+}
+
+static inline void arm_smmu_writel(struct arm_smmu_device *smmu, int page,
+                                  int offset, u32 val)
+{
+       if (smmu->impl && unlikely(smmu->impl->write_reg))
+               smmu->impl->write_reg(smmu, page, offset, val);
+       else
+               writel_relaxed(val, arm_smmu_page(smmu, page) + offset);
+}
+
+static inline u64 arm_smmu_readq(struct arm_smmu_device *smmu, int page, int offset)
+{
+       if (smmu->impl && unlikely(smmu->impl->read_reg64))
+               return smmu->impl->read_reg64(smmu, page, offset);
+       return readq_relaxed(arm_smmu_page(smmu, page) + offset);
+}
+
+static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page,
+                                  int offset, u64 val)
+{
+       if (smmu->impl && unlikely(smmu->impl->write_reg64))
+               smmu->impl->write_reg64(smmu, page, offset, val);
+       else
+               writeq_relaxed(val, arm_smmu_page(smmu, page) + offset);
+}
+
+#define ARM_SMMU_GR0           0
+#define ARM_SMMU_GR1           1
+#define ARM_SMMU_CB(s, n)      ((s)->numpage + (n))
+
+#define arm_smmu_gr0_read(s, o)                \
+       arm_smmu_readl((s), ARM_SMMU_GR0, (o))
+#define arm_smmu_gr0_write(s, o, v)    \
+       arm_smmu_writel((s), ARM_SMMU_GR0, (o), (v))
+
+#define arm_smmu_gr1_read(s, o)                \
+       arm_smmu_readl((s), ARM_SMMU_GR1, (o))
+#define arm_smmu_gr1_write(s, o, v)    \
+       arm_smmu_writel((s), ARM_SMMU_GR1, (o), (v))
+
+#define arm_smmu_cb_read(s, n, o)      \
+       arm_smmu_readl((s), ARM_SMMU_CB((s), (n)), (o))
+#define arm_smmu_cb_write(s, n, o, v)  \
+       arm_smmu_writel((s), ARM_SMMU_CB((s), (n)), (o), (v))
+#define arm_smmu_cb_readq(s, n, o)     \
+       arm_smmu_readq((s), ARM_SMMU_CB((s), (n)), (o))
+#define arm_smmu_cb_writeq(s, n, o, v) \
+       arm_smmu_writeq((s), ARM_SMMU_CB((s), (n)), (o), (v))
+
+struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu);
+
+#endif /* _ARM_SMMU_H */
index f68a62c3c32b55e1414ee5eff0cc34a452f38489..8f412af842471aef7a56603156123a2d70fd8142 100644 (file)
@@ -303,13 +303,15 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
                u64 size, struct device *dev)
 {
        struct iommu_dma_cookie *cookie = domain->iova_cookie;
-       struct iova_domain *iovad = &cookie->iovad;
        unsigned long order, base_pfn;
+       struct iova_domain *iovad;
        int attr;
 
        if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
                return -EINVAL;
 
+       iovad = &cookie->iovad;
+
        /* Use the smallest supported page size for IOVA granularity */
        order = __ffs(domain->pgsize_bitmap);
        base_pfn = max_t(unsigned long, 1, base >> order);
@@ -444,13 +446,18 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
        struct iommu_dma_cookie *cookie = domain->iova_cookie;
        struct iova_domain *iovad = &cookie->iovad;
        size_t iova_off = iova_offset(iovad, dma_addr);
+       struct iommu_iotlb_gather iotlb_gather;
+       size_t unmapped;
 
        dma_addr -= iova_off;
        size = iova_align(iovad, size + iova_off);
+       iommu_iotlb_gather_init(&iotlb_gather);
+
+       unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
+       WARN_ON(unmapped != size);
 
-       WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size);
        if (!cookie->fq_domain)
-               iommu_tlb_sync(domain);
+               iommu_tlb_sync(domain, &iotlb_gather);
        iommu_dma_free_iova(cookie, dma_addr, size);
 }
 
index 5d0754ed5fa0c34203b8ffd7d22403a29bccd013..eecd6a4216672e430224fb961b53e98cf37ee423 100644 (file)
@@ -1519,6 +1519,64 @@ static const char *dma_remap_fault_reasons[] =
        "PCE for translation request specifies blocking",
 };
 
+static const char * const dma_remap_sm_fault_reasons[] = {
+       "SM: Invalid Root Table Address",
+       "SM: TTM 0 for request with PASID",
+       "SM: TTM 0 for page group request",
+       "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
+       "SM: Error attempting to access Root Entry",
+       "SM: Present bit in Root Entry is clear",
+       "SM: Non-zero reserved field set in Root Entry",
+       "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
+       "SM: Error attempting to access Context Entry",
+       "SM: Present bit in Context Entry is clear",
+       "SM: Non-zero reserved field set in the Context Entry",
+       "SM: Invalid Context Entry",
+       "SM: DTE field in Context Entry is clear",
+       "SM: PASID Enable field in Context Entry is clear",
+       "SM: PASID is larger than the max in Context Entry",
+       "SM: PRE field in Context-Entry is clear",
+       "SM: RID_PASID field error in Context-Entry",
+       "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
+       "SM: Error attempting to access the PASID Directory Entry",
+       "SM: Present bit in Directory Entry is clear",
+       "SM: Non-zero reserved field set in PASID Directory Entry",
+       "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
+       "SM: Error attempting to access PASID Table Entry",
+       "SM: Present bit in PASID Table Entry is clear",
+       "SM: Non-zero reserved field set in PASID Table Entry",
+       "SM: Invalid Scalable-Mode PASID Table Entry",
+       "SM: ERE field is clear in PASID Table Entry",
+       "SM: SRE field is clear in PASID Table Entry",
+       "Unknown", "Unknown",/* 0x5E-0x5F */
+       "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
+       "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
+       "SM: Error attempting to access first-level paging entry",
+       "SM: Present bit in first-level paging entry is clear",
+       "SM: Non-zero reserved field set in first-level paging entry",
+       "SM: Error attempting to access FL-PML4 entry",
+       "SM: First-level entry address beyond MGAW in Nested translation",
+       "SM: Read permission error in FL-PML4 entry in Nested translation",
+       "SM: Read permission error in first-level paging entry in Nested translation",
+       "SM: Write permission error in first-level paging entry in Nested translation",
+       "SM: Error attempting to access second-level paging entry",
+       "SM: Read/Write permission error in second-level paging entry",
+       "SM: Non-zero reserved field set in second-level paging entry",
+       "SM: Invalid second-level page table pointer",
+       "SM: A/D bit update needed in second-level entry when set up in no snoop",
+       "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
+       "SM: Address in first-level translation is not canonical",
+       "SM: U/S set 0 for first-level translation with user privilege",
+       "SM: No execute permission for request with PASID and ER=1",
+       "SM: Address beyond the DMA hardware max",
+       "SM: Second-level entry address beyond the max",
+       "SM: No write permission for Write/AtomicOp request",
+       "SM: No read permission for Read/AtomicOp request",
+       "SM: Invalid address-interrupt address",
+       "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
+       "SM: A/D bit update needed in first-level entry when set up in no snoop",
+};
+
 static const char *irq_remap_fault_reasons[] =
 {
        "Detected reserved fields in the decoded interrupt-remapped request",
@@ -1536,6 +1594,10 @@ static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
                                        ARRAY_SIZE(irq_remap_fault_reasons))) {
                *fault_type = INTR_REMAP;
                return irq_remap_fault_reasons[fault_reason - 0x20];
+       } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
+                       ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
+               *fault_type = DMA_REMAP;
+               return dma_remap_sm_fault_reasons[fault_reason - 0x30];
        } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
                *fault_type = DMA_REMAP;
                return dma_remap_fault_reasons[fault_reason];
@@ -1611,7 +1673,8 @@ void dmar_msi_read(int irq, struct msi_msg *msg)
 }
 
 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
-               u8 fault_reason, u16 source_id, unsigned long long addr)
+               u8 fault_reason, int pasid, u16 source_id,
+               unsigned long long addr)
 {
        const char *reason;
        int fault_type;
@@ -1624,10 +1687,11 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
                        PCI_FUNC(source_id & 0xFF), addr >> 48,
                        fault_reason, reason);
        else
-               pr_err("[%s] Request device [%02x:%02x.%d] fault addr %llx [fault reason %02d] %s\n",
+               pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
                       type ? "DMA Read" : "DMA Write",
                       source_id >> 8, PCI_SLOT(source_id & 0xFF),
-                      PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
+                      PCI_FUNC(source_id & 0xFF), pasid, addr,
+                      fault_reason, reason);
        return 0;
 }
 
@@ -1659,8 +1723,9 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
                u8 fault_reason;
                u16 source_id;
                u64 guest_addr;
-               int type;
+               int type, pasid;
                u32 data;
+               bool pasid_present;
 
                /* highest 32 bits */
                data = readl(iommu->reg + reg +
@@ -1672,10 +1737,12 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
                        fault_reason = dma_frcd_fault_reason(data);
                        type = dma_frcd_type(data);
 
+                       pasid = dma_frcd_pasid_value(data);
                        data = readl(iommu->reg + reg +
                                     fault_index * PRIMARY_FAULT_REG_LEN + 8);
                        source_id = dma_frcd_source_id(data);
 
+                       pasid_present = dma_frcd_pasid_present(data);
                        guest_addr = dmar_readq(iommu->reg + reg +
                                        fault_index * PRIMARY_FAULT_REG_LEN);
                        guest_addr = dma_frcd_page_addr(guest_addr);
@@ -1688,7 +1755,9 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
                raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 
                if (!ratelimited)
+                       /* Using pasid -1 if pasid is not present */
                        dmar_fault_do_one(iommu, type, fault_reason,
+                                         pasid_present ? pasid : -1,
                                          source_id, guest_addr);
 
                fault_index++;
index b0c1e5f9daae5acc353ef91c5d0ab44f773fa318..9c94e16fb1277f793bb8d24062b09afb8ab49d74 100644 (file)
@@ -566,7 +566,7 @@ static void sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data,
 
 static const struct iommu_ops exynos_iommu_ops;
 
-static int __init exynos_sysmmu_probe(struct platform_device *pdev)
+static int exynos_sysmmu_probe(struct platform_device *pdev)
 {
        int irq, ret;
        struct device *dev = &pdev->dev;
@@ -583,10 +583,8 @@ static int __init exynos_sysmmu_probe(struct platform_device *pdev)
                return PTR_ERR(data->sfrbase);
 
        irq = platform_get_irq(pdev, 0);
-       if (irq <= 0) {
-               dev_err(dev, "Unable to find IRQ resource\n");
+       if (irq <= 0)
                return irq;
-       }
 
        ret = devm_request_irq(dev, irq, exynos_sysmmu_irq, 0,
                                dev_name(dev), data);
@@ -1130,7 +1128,8 @@ static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *domain
 }
 
 static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain,
-                                unsigned long l_iova, size_t size)
+                                unsigned long l_iova, size_t size,
+                                struct iommu_iotlb_gather *gather)
 {
        struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
        sysmmu_iova_t iova = (sysmmu_iova_t)l_iova;
index c4e0e4a9ee9ec5c2f5632176591badb3161031fa..87de0b975672b0a8864277ff799b5e02e56547ea 100644 (file)
 #include <linux/dma-direct.h>
 #include <linux/crash_dump.h>
 #include <linux/numa.h>
+#include <linux/swiotlb.h>
 #include <asm/irq_remapping.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
+#include <trace/events/intel_iommu.h>
 
 #include "irq_remapping.h"
 #include "intel-pasid.h"
@@ -346,6 +348,8 @@ static int domain_detach_iommu(struct dmar_domain *domain,
 static bool device_is_rmrr_locked(struct device *dev);
 static int intel_iommu_attach_device(struct iommu_domain *domain,
                                     struct device *dev);
+static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
+                                           dma_addr_t iova);
 
 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
 int dmar_disabled = 0;
@@ -362,6 +366,7 @@ static int dmar_forcedac;
 static int intel_iommu_strict;
 static int intel_iommu_superpage = 1;
 static int iommu_identity_mapping;
+static int intel_no_bounce;
 
 #define IDENTMAP_ALL           1
 #define IDENTMAP_GFX           2
@@ -375,6 +380,9 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 static DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
 
+#define device_needs_bounce(d) (!intel_no_bounce && dev_is_pci(d) &&   \
+                               to_pci_dev(d)->untrusted)
+
 /*
  * Iterate over elements in device_domain_list and call the specified
  * callback @fn against each element.
@@ -457,6 +465,9 @@ static int __init intel_iommu_setup(char *str)
                        printk(KERN_INFO
                                "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
                        intel_iommu_tboot_noforce = 1;
+               } else if (!strncmp(str, "nobounce", 8)) {
+                       pr_info("Intel-IOMMU: No bounce buffer. This could expose security risks of DMA attacks\n");
+                       intel_no_bounce = 1;
                }
 
                str += strcspn(str, ",");
@@ -3296,7 +3307,7 @@ static int __init init_dmars(void)
                iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
        }
 
-       if (iommu_pass_through)
+       if (iommu_default_passthrough())
                iommu_identity_mapping |= IDENTMAP_ALL;
 
 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
@@ -3534,6 +3545,9 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
 
        start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
        start_paddr += paddr & ~PAGE_MASK;
+
+       trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
+
        return start_paddr;
 
 error:
@@ -3589,10 +3603,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
        if (dev_is_pci(dev))
                pdev = to_pci_dev(dev);
 
-       dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn);
-
        freelist = domain_unmap(domain, start_pfn, last_pfn);
-
        if (intel_iommu_strict || (pdev && pdev->untrusted) ||
                        !has_iova_flush_queue(&domain->iovad)) {
                iommu_flush_iotlb_psi(iommu, domain, start_pfn,
@@ -3608,6 +3619,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
                 * cpu used up by the iotlb flush operation...
                 */
        }
+
+       trace_unmap_single(dev, dev_addr, size);
 }
 
 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
@@ -3698,6 +3711,8 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
        }
 
        intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
+
+       trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
 }
 
 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
@@ -3754,6 +3769,9 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
                return 0;
        }
 
+       trace_map_sg(dev, iova_pfn << PAGE_SHIFT,
+                    sg_phys(sglist), size << VTD_PAGE_SHIFT);
+
        return nelems;
 }
 
@@ -3769,6 +3787,252 @@ static const struct dma_map_ops intel_dma_ops = {
        .dma_supported = dma_direct_supported,
 };
 
+static void
+bounce_sync_single(struct device *dev, dma_addr_t addr, size_t size,
+                  enum dma_data_direction dir, enum dma_sync_target target)
+{
+       struct dmar_domain *domain;
+       phys_addr_t tlb_addr;
+
+       domain = find_domain(dev);
+       if (WARN_ON(!domain))
+               return;
+
+       tlb_addr = intel_iommu_iova_to_phys(&domain->domain, addr);
+       if (is_swiotlb_buffer(tlb_addr))
+               swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
+}
+
+static dma_addr_t
+bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size,
+                 enum dma_data_direction dir, unsigned long attrs,
+                 u64 dma_mask)
+{
+       size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
+       struct dmar_domain *domain;
+       struct intel_iommu *iommu;
+       unsigned long iova_pfn;
+       unsigned long nrpages;
+       phys_addr_t tlb_addr;
+       int prot = 0;
+       int ret;
+
+       domain = find_domain(dev);
+       if (WARN_ON(dir == DMA_NONE || !domain))
+               return DMA_MAPPING_ERROR;
+
+       iommu = domain_get_iommu(domain);
+       if (WARN_ON(!iommu))
+               return DMA_MAPPING_ERROR;
+
+       nrpages = aligned_nrpages(0, size);
+       iova_pfn = intel_alloc_iova(dev, domain,
+                                   dma_to_mm_pfn(nrpages), dma_mask);
+       if (!iova_pfn)
+               return DMA_MAPPING_ERROR;
+
+       /*
+        * Check if DMAR supports zero-length reads on write only
+        * mappings..
+        */
+       if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL ||
+                       !cap_zlr(iommu->cap))
+               prot |= DMA_PTE_READ;
+       if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
+               prot |= DMA_PTE_WRITE;
+
+       /*
+        * If both the physical buffer start address and size are
+        * page aligned, we don't need to use a bounce page.
+        */
+       if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
+               tlb_addr = swiotlb_tbl_map_single(dev,
+                               __phys_to_dma(dev, io_tlb_start),
+                               paddr, size, aligned_size, dir, attrs);
+               if (tlb_addr == DMA_MAPPING_ERROR) {
+                       goto swiotlb_error;
+               } else {
+                       /* Cleanup the padding area. */
+                       void *padding_start = phys_to_virt(tlb_addr);
+                       size_t padding_size = aligned_size;
+
+                       if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+                           (dir == DMA_TO_DEVICE ||
+                            dir == DMA_BIDIRECTIONAL)) {
+                               padding_start += size;
+                               padding_size -= size;
+                       }
+
+                       memset(padding_start, 0, padding_size);
+               }
+       } else {
+               tlb_addr = paddr;
+       }
+
+       ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova_pfn),
+                                tlb_addr >> VTD_PAGE_SHIFT, nrpages, prot);
+       if (ret)
+               goto mapping_error;
+
+       trace_bounce_map_single(dev, iova_pfn << PAGE_SHIFT, paddr, size);
+
+       return (phys_addr_t)iova_pfn << PAGE_SHIFT;
+
+mapping_error:
+       if (is_swiotlb_buffer(tlb_addr))
+               swiotlb_tbl_unmap_single(dev, tlb_addr, size,
+                                        aligned_size, dir, attrs);
+swiotlb_error:
+       free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
+       dev_err(dev, "Device bounce map: %zx@%llx dir %d --- failed\n",
+               size, (unsigned long long)paddr, dir);
+
+       return DMA_MAPPING_ERROR;
+}
+
+static void
+bounce_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
+                   enum dma_data_direction dir, unsigned long attrs)
+{
+       size_t aligned_size = ALIGN(size, VTD_PAGE_SIZE);
+       struct dmar_domain *domain;
+       phys_addr_t tlb_addr;
+
+       domain = find_domain(dev);
+       if (WARN_ON(!domain))
+               return;
+
+       tlb_addr = intel_iommu_iova_to_phys(&domain->domain, dev_addr);
+       if (WARN_ON(!tlb_addr))
+               return;
+
+       intel_unmap(dev, dev_addr, size);
+       if (is_swiotlb_buffer(tlb_addr))
+               swiotlb_tbl_unmap_single(dev, tlb_addr, size,
+                                        aligned_size, dir, attrs);
+
+       trace_bounce_unmap_single(dev, dev_addr, size);
+}
+
+static dma_addr_t
+bounce_map_page(struct device *dev, struct page *page, unsigned long offset,
+               size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+       return bounce_map_single(dev, page_to_phys(page) + offset,
+                                size, dir, attrs, *dev->dma_mask);
+}
+
+static dma_addr_t
+bounce_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size,
+                   enum dma_data_direction dir, unsigned long attrs)
+{
+       return bounce_map_single(dev, phys_addr, size,
+                                dir, attrs, *dev->dma_mask);
+}
+
+static void
+bounce_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size,
+                 enum dma_data_direction dir, unsigned long attrs)
+{
+       bounce_unmap_single(dev, dev_addr, size, dir, attrs);
+}
+
+static void
+bounce_unmap_resource(struct device *dev, dma_addr_t dev_addr, size_t size,
+                     enum dma_data_direction dir, unsigned long attrs)
+{
+       bounce_unmap_single(dev, dev_addr, size, dir, attrs);
+}
+
+static void
+bounce_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems,
+               enum dma_data_direction dir, unsigned long attrs)
+{
+       struct scatterlist *sg;
+       int i;
+
+       for_each_sg(sglist, sg, nelems, i)
+               bounce_unmap_page(dev, sg->dma_address,
+                                 sg_dma_len(sg), dir, attrs);
+}
+
+static int
+bounce_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
+             enum dma_data_direction dir, unsigned long attrs)
+{
+       int i;
+       struct scatterlist *sg;
+
+       for_each_sg(sglist, sg, nelems, i) {
+               sg->dma_address = bounce_map_page(dev, sg_page(sg),
+                                                 sg->offset, sg->length,
+                                                 dir, attrs);
+               if (sg->dma_address == DMA_MAPPING_ERROR)
+                       goto out_unmap;
+               sg_dma_len(sg) = sg->length;
+       }
+
+       return nelems;
+
+out_unmap:
+       bounce_unmap_sg(dev, sglist, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
+       return 0;
+}
+
+static void
+bounce_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
+                          size_t size, enum dma_data_direction dir)
+{
+       bounce_sync_single(dev, addr, size, dir, SYNC_FOR_CPU);
+}
+
+static void
+bounce_sync_single_for_device(struct device *dev, dma_addr_t addr,
+                             size_t size, enum dma_data_direction dir)
+{
+       bounce_sync_single(dev, addr, size, dir, SYNC_FOR_DEVICE);
+}
+
+static void
+bounce_sync_sg_for_cpu(struct device *dev, struct scatterlist *sglist,
+                      int nelems, enum dma_data_direction dir)
+{
+       struct scatterlist *sg;
+       int i;
+
+       for_each_sg(sglist, sg, nelems, i)
+               bounce_sync_single(dev, sg_dma_address(sg),
+                                  sg_dma_len(sg), dir, SYNC_FOR_CPU);
+}
+
+static void
+bounce_sync_sg_for_device(struct device *dev, struct scatterlist *sglist,
+                         int nelems, enum dma_data_direction dir)
+{
+       struct scatterlist *sg;
+       int i;
+
+       for_each_sg(sglist, sg, nelems, i)
+               bounce_sync_single(dev, sg_dma_address(sg),
+                                  sg_dma_len(sg), dir, SYNC_FOR_DEVICE);
+}
+
+static const struct dma_map_ops bounce_dma_ops = {
+       .alloc                  = intel_alloc_coherent,
+       .free                   = intel_free_coherent,
+       .map_sg                 = bounce_map_sg,
+       .unmap_sg               = bounce_unmap_sg,
+       .map_page               = bounce_map_page,
+       .unmap_page             = bounce_unmap_page,
+       .sync_single_for_cpu    = bounce_sync_single_for_cpu,
+       .sync_single_for_device = bounce_sync_single_for_device,
+       .sync_sg_for_cpu        = bounce_sync_sg_for_cpu,
+       .sync_sg_for_device     = bounce_sync_sg_for_device,
+       .map_resource           = bounce_map_resource,
+       .unmap_resource         = bounce_unmap_resource,
+       .dma_supported          = dma_direct_supported,
+};
+
 static inline int iommu_domain_cache_init(void)
 {
        int ret = 0;
@@ -4569,22 +4833,20 @@ const struct attribute_group *intel_iommu_groups[] = {
        NULL,
 };
 
-static int __init platform_optin_force_iommu(void)
+static inline bool has_untrusted_dev(void)
 {
        struct pci_dev *pdev = NULL;
-       bool has_untrusted_dev = false;
 
-       if (!dmar_platform_optin() || no_platform_optin)
-               return 0;
+       for_each_pci_dev(pdev)
+               if (pdev->untrusted)
+                       return true;
 
-       for_each_pci_dev(pdev) {
-               if (pdev->untrusted) {
-                       has_untrusted_dev = true;
-                       break;
-               }
-       }
+       return false;
+}
 
-       if (!has_untrusted_dev)
+static int __init platform_optin_force_iommu(void)
+{
+       if (!dmar_platform_optin() || no_platform_optin || !has_untrusted_dev())
                return 0;
 
        if (no_iommu || dmar_disabled)
@@ -4598,9 +4860,6 @@ static int __init platform_optin_force_iommu(void)
                iommu_identity_mapping |= IDENTMAP_ALL;
 
        dmar_disabled = 0;
-#if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
-       swiotlb = 0;
-#endif
        no_iommu = 0;
 
        return 1;
@@ -4740,7 +4999,14 @@ int __init intel_iommu_init(void)
        up_write(&dmar_global_lock);
 
 #if defined(CONFIG_X86) && defined(CONFIG_SWIOTLB)
-       swiotlb = 0;
+       /*
+        * If the system has no untrusted device or the user has decided
+        * to disable the bounce page mechanisms, we don't need swiotlb.
+        * Mark this and the pre-allocated bounce pages will be released
+        * later.
+        */
+       if (!has_untrusted_dev() || intel_no_bounce)
+               swiotlb = 0;
 #endif
        dma_ops = &intel_dma_ops;
 
@@ -5204,7 +5470,8 @@ static int intel_iommu_map(struct iommu_domain *domain,
 }
 
 static size_t intel_iommu_unmap(struct iommu_domain *domain,
-                               unsigned long iova, size_t size)
+                               unsigned long iova, size_t size,
+                               struct iommu_iotlb_gather *gather)
 {
        struct dmar_domain *dmar_domain = to_dmar_domain(domain);
        struct page *freelist = NULL;
@@ -5360,6 +5627,11 @@ static int intel_iommu_add_device(struct device *dev)
                }
        }
 
+       if (device_needs_bounce(dev)) {
+               dev_info(dev, "Use Intel IOMMU bounce page dma_ops\n");
+               set_dma_ops(dev, &bounce_dma_ops);
+       }
+
        return 0;
 }
 
@@ -5377,6 +5649,9 @@ static void intel_iommu_remove_device(struct device *dev)
        iommu_group_remove_device(dev);
 
        iommu_device_unlink(&iommu->iommu, dev);
+
+       if (device_needs_bounce(dev))
+               set_dma_ops(dev, NULL);
 }
 
 static void intel_iommu_get_resv_regions(struct device *device,
@@ -5690,20 +5965,46 @@ const struct iommu_ops intel_iommu_ops = {
        .pgsize_bitmap          = INTEL_IOMMU_PGSIZES,
 };
 
-static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
+static void quirk_iommu_igfx(struct pci_dev *dev)
 {
-       /* G4x/GM45 integrated gfx dmar support is totally busted. */
        pci_info(dev, "Disabling IOMMU for graphics on this chipset\n");
        dmar_map_gfx = 0;
 }
 
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
+/* G4x/GM45 integrated gfx dmar support is totally busted. */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
+
+/* Broadwell igfx malfunctions with dmar */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160E, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1602, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160A, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160D, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1616, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161B, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161E, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1612, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161A, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x161D, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1626, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162B, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162E, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1622, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162A, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x162D, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1636, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163B, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163E, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx);
 
 static void quirk_iommu_rwbf(struct pci_dev *dev)
 {
diff --git a/drivers/iommu/intel-trace.c b/drivers/iommu/intel-trace.c
new file mode 100644 (file)
index 0000000..bfb6a6e
--- /dev/null
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel IOMMU trace support
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ */
+
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/intel_iommu.h>
index 4786ca061e31fd6e0ad6cb3328f66e89306e064a..81e43c1df7ecb5fdd769e2dc1bc4c265f37fec1f 100644 (file)
@@ -376,13 +376,13 @@ static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque)
 {
        struct set_msi_sid_data *data = opaque;
 
+       if (data->count == 0 || PCI_BUS_NUM(alias) == PCI_BUS_NUM(data->alias))
+               data->busmatch_count++;
+
        data->pdev = pdev;
        data->alias = alias;
        data->count++;
 
-       if (PCI_BUS_NUM(alias) == pdev->bus->number)
-               data->busmatch_count++;
-
        return 0;
 }
 
index 0fc8dfab2abf57d51418ba17f5d470eb6292b49a..4cb394937700ce4e0f5ff7133042f6e9f5f6a38c 100644 (file)
 #define ARM_V7S_TEX_MASK               0x7
 #define ARM_V7S_ATTR_TEX(val)          (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
 
-#define ARM_V7S_ATTR_MTK_4GB           BIT(9) /* MTK extend it for 4GB mode */
+/* MediaTek extend the two bits for PA 32bit/33bit */
+#define ARM_V7S_ATTR_MTK_PA_BIT32      BIT(9)
+#define ARM_V7S_ATTR_MTK_PA_BIT33      BIT(4)
 
 /* *well, except for TEX on level 2 large pages, of course :( */
 #define ARM_V7S_CONT_PAGE_TEX_SHIFT    6
@@ -169,18 +171,62 @@ struct arm_v7s_io_pgtable {
        spinlock_t              split_lock;
 };
 
+static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl);
+
 static dma_addr_t __arm_v7s_dma_addr(void *pages)
 {
        return (dma_addr_t)virt_to_phys(pages);
 }
 
-static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl)
+static bool arm_v7s_is_mtk_enabled(struct io_pgtable_cfg *cfg)
+{
+       return IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
+               (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT);
+}
+
+static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
+                                   struct io_pgtable_cfg *cfg)
+{
+       arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
+
+       if (!arm_v7s_is_mtk_enabled(cfg))
+               return pte;
+
+       if (paddr & BIT_ULL(32))
+               pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
+       if (paddr & BIT_ULL(33))
+               pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
+       return pte;
+}
+
+static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
+                                 struct io_pgtable_cfg *cfg)
 {
+       arm_v7s_iopte mask;
+       phys_addr_t paddr;
+
        if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
-               pte &= ARM_V7S_TABLE_MASK;
+               mask = ARM_V7S_TABLE_MASK;
+       else if (arm_v7s_pte_is_cont(pte, lvl))
+               mask = ARM_V7S_LVL_MASK(lvl) * ARM_V7S_CONT_PAGES;
        else
-               pte &= ARM_V7S_LVL_MASK(lvl);
-       return phys_to_virt(pte);
+               mask = ARM_V7S_LVL_MASK(lvl);
+
+       paddr = pte & mask;
+       if (!arm_v7s_is_mtk_enabled(cfg))
+               return paddr;
+
+       if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
+               paddr |= BIT_ULL(32);
+       if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
+               paddr |= BIT_ULL(33);
+       return paddr;
+}
+
+static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl,
+                                 struct arm_v7s_io_pgtable *data)
+{
+       return phys_to_virt(iopte_to_paddr(pte, lvl, &data->iop.cfg));
 }
 
 static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
@@ -295,9 +341,6 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
        if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
                pte |= ARM_V7S_ATTR_NS_SECTION;
 
-       if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
-               pte |= ARM_V7S_ATTR_MTK_4GB;
-
        return pte;
 }
 
@@ -362,7 +405,8 @@ static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
        return false;
 }
 
-static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
+static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *,
+                             struct iommu_iotlb_gather *, unsigned long,
                              size_t, int, arm_v7s_iopte *);
 
 static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
@@ -383,7 +427,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
                        size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
 
                        tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl);
-                       if (WARN_ON(__arm_v7s_unmap(data, iova + i * sz,
+                       if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz,
                                                    sz, lvl, tblp) != sz))
                                return -EINVAL;
                } else if (ptep[i]) {
@@ -396,7 +440,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
        if (num_entries > 1)
                pte = arm_v7s_pte_to_cont(pte, lvl);
 
-       pte |= paddr & ARM_V7S_LVL_MASK(lvl);
+       pte |= paddr_to_iopte(paddr, lvl, cfg);
 
        __arm_v7s_set_pte(ptep, pte, num_entries, cfg);
        return 0;
@@ -462,7 +506,7 @@ static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
        }
 
        if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
-               cptep = iopte_deref(pte, lvl);
+               cptep = iopte_deref(pte, lvl, data);
        } else if (pte) {
                /* We require an unmap first */
                WARN_ON(!selftest_running);
@@ -484,7 +528,8 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
        if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
                return 0;
 
-       if (WARN_ON(upper_32_bits(iova) || upper_32_bits(paddr)))
+       if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) ||
+                   paddr >= (1ULL << data->iop.cfg.oas)))
                return -ERANGE;
 
        ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd);
@@ -493,9 +538,8 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
         * a chance for anything to kick off a table walk for the new iova.
         */
        if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
-               io_pgtable_tlb_add_flush(iop, iova, size,
-                                        ARM_V7S_BLOCK_SIZE(2), false);
-               io_pgtable_tlb_sync(iop);
+               io_pgtable_tlb_flush_walk(iop, iova, size,
+                                         ARM_V7S_BLOCK_SIZE(2));
        } else {
                wmb();
        }
@@ -512,7 +556,8 @@ static void arm_v7s_free_pgtable(struct io_pgtable *iop)
                arm_v7s_iopte pte = data->pgd[i];
 
                if (ARM_V7S_PTE_IS_TABLE(pte, 1))
-                       __arm_v7s_free_table(iopte_deref(pte, 1), 2, data);
+                       __arm_v7s_free_table(iopte_deref(pte, 1, data),
+                                            2, data);
        }
        __arm_v7s_free_table(data->pgd, 1, data);
        kmem_cache_destroy(data->l2_tables);
@@ -541,12 +586,12 @@ static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
        __arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
 
        size *= ARM_V7S_CONT_PAGES;
-       io_pgtable_tlb_add_flush(iop, iova, size, size, true);
-       io_pgtable_tlb_sync(iop);
+       io_pgtable_tlb_flush_leaf(iop, iova, size, size);
        return pte;
 }
 
 static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
+                                     struct iommu_iotlb_gather *gather,
                                      unsigned long iova, size_t size,
                                      arm_v7s_iopte blk_pte,
                                      arm_v7s_iopte *ptep)
@@ -582,16 +627,16 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
                if (!ARM_V7S_PTE_IS_TABLE(pte, 1))
                        return 0;
 
-               tablep = iopte_deref(pte, 1);
-               return __arm_v7s_unmap(data, iova, size, 2, tablep);
+               tablep = iopte_deref(pte, 1, data);
+               return __arm_v7s_unmap(data, gather, iova, size, 2, tablep);
        }
 
-       io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
-       io_pgtable_tlb_sync(&data->iop);
+       io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
        return size;
 }
 
 static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
+                             struct iommu_iotlb_gather *gather,
                              unsigned long iova, size_t size, int lvl,
                              arm_v7s_iopte *ptep)
 {
@@ -638,10 +683,9 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
                for (i = 0; i < num_entries; i++) {
                        if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) {
                                /* Also flush any partial walks */
-                               io_pgtable_tlb_add_flush(iop, iova, blk_size,
-                                       ARM_V7S_BLOCK_SIZE(lvl + 1), false);
-                               io_pgtable_tlb_sync(iop);
-                               ptep = iopte_deref(pte[i], lvl);
+                               io_pgtable_tlb_flush_walk(iop, iova, blk_size,
+                                               ARM_V7S_BLOCK_SIZE(lvl + 1));
+                               ptep = iopte_deref(pte[i], lvl, data);
                                __arm_v7s_free_table(ptep, lvl + 1, data);
                        } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
                                /*
@@ -651,8 +695,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
                                 */
                                smp_wmb();
                        } else {
-                               io_pgtable_tlb_add_flush(iop, iova, blk_size,
-                                                        blk_size, true);
+                               io_pgtable_tlb_add_page(iop, gather, iova, blk_size);
                        }
                        iova += blk_size;
                }
@@ -662,23 +705,24 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
                 * Insert a table at the next level to map the old region,
                 * minus the part we want to unmap
                 */
-               return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep);
+               return arm_v7s_split_blk_unmap(data, gather, iova, size, pte[0],
+                                              ptep);
        }
 
        /* Keep on walkin' */
-       ptep = iopte_deref(pte[0], lvl);
-       return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep);
+       ptep = iopte_deref(pte[0], lvl, data);
+       return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
 }
 
 static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-                           size_t size)
+                           size_t size, struct iommu_iotlb_gather *gather)
 {
        struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
 
        if (WARN_ON(upper_32_bits(iova)))
                return 0;
 
-       return __arm_v7s_unmap(data, iova, size, 1, data->pgd);
+       return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
 }
 
 static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
@@ -692,7 +736,7 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
        do {
                ptep += ARM_V7S_LVL_IDX(iova, ++lvl);
                pte = READ_ONCE(*ptep);
-               ptep = iopte_deref(pte, lvl);
+               ptep = iopte_deref(pte, lvl, data);
        } while (ARM_V7S_PTE_IS_TABLE(pte, lvl));
 
        if (!ARM_V7S_PTE_IS_VALID(pte))
@@ -701,7 +745,7 @@ static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
        mask = ARM_V7S_LVL_MASK(lvl);
        if (arm_v7s_pte_is_cont(pte, lvl))
                mask *= ARM_V7S_CONT_PAGES;
-       return (pte & mask) | (iova & ~mask);
+       return iopte_to_paddr(pte, lvl, &data->iop.cfg) | (iova & ~mask);
 }
 
 static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
@@ -709,18 +753,21 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 {
        struct arm_v7s_io_pgtable *data;
 
-       if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
+       if (cfg->ias > ARM_V7S_ADDR_BITS)
+               return NULL;
+
+       if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
                return NULL;
 
        if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
                            IO_PGTABLE_QUIRK_NO_PERMS |
                            IO_PGTABLE_QUIRK_TLBI_ON_MAP |
-                           IO_PGTABLE_QUIRK_ARM_MTK_4GB |
+                           IO_PGTABLE_QUIRK_ARM_MTK_EXT |
                            IO_PGTABLE_QUIRK_NON_STRICT))
                return NULL;
 
        /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
-       if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB &&
+       if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT &&
            !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS))
                        return NULL;
 
@@ -806,22 +853,24 @@ static void dummy_tlb_flush_all(void *cookie)
        WARN_ON(cookie != cfg_cookie);
 }
 
-static void dummy_tlb_add_flush(unsigned long iova, size_t size,
-                               size_t granule, bool leaf, void *cookie)
+static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
+                           void *cookie)
 {
        WARN_ON(cookie != cfg_cookie);
        WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
 }
 
-static void dummy_tlb_sync(void *cookie)
+static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
+                              unsigned long iova, size_t granule, void *cookie)
 {
-       WARN_ON(cookie != cfg_cookie);
+       dummy_tlb_flush(iova, granule, granule, cookie);
 }
 
-static const struct iommu_gather_ops dummy_tlb_ops = {
+static const struct iommu_flush_ops dummy_tlb_ops = {
        .tlb_flush_all  = dummy_tlb_flush_all,
-       .tlb_add_flush  = dummy_tlb_add_flush,
-       .tlb_sync       = dummy_tlb_sync,
+       .tlb_flush_walk = dummy_tlb_flush,
+       .tlb_flush_leaf = dummy_tlb_flush,
+       .tlb_add_page   = dummy_tlb_add_page,
 };
 
 #define __FAIL(ops)    ({                              \
@@ -896,7 +945,7 @@ static int __init arm_v7s_do_selftests(void)
        size = 1UL << __ffs(cfg.pgsize_bitmap);
        while (i < loopnr) {
                iova_start = i * SZ_16M;
-               if (ops->unmap(ops, iova_start + size, size) != size)
+               if (ops->unmap(ops, iova_start + size, size, NULL) != size)
                        return __FAIL(ops);
 
                /* Remap of partial unmap */
@@ -914,7 +963,7 @@ static int __init arm_v7s_do_selftests(void)
        for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
                size = 1UL << i;
 
-               if (ops->unmap(ops, iova, size) != size)
+               if (ops->unmap(ops, iova, size, NULL) != size)
                        return __FAIL(ops);
 
                if (ops->iova_to_phys(ops, iova + 42))
index 161a7d56264d0c26b7297930c0d843df674dd7bc..4c91359057c53d906ee5496396e28595c2153905 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/io-pgtable.h>
-#include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
@@ -290,6 +289,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+                              struct iommu_iotlb_gather *gather,
                               unsigned long iova, size_t size, int lvl,
                               arm_lpae_iopte *ptep);
 
@@ -335,8 +335,10 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
                size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
 
                tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
-               if (WARN_ON(__arm_lpae_unmap(data, iova, sz, lvl, tblp) != sz))
+               if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
+                       WARN_ON(1);
                        return -EINVAL;
+               }
        }
 
        __arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
@@ -537,6 +539,7 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
 }
 
 static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
+                                      struct iommu_iotlb_gather *gather,
                                       unsigned long iova, size_t size,
                                       arm_lpae_iopte blk_pte, int lvl,
                                       arm_lpae_iopte *ptep)
@@ -582,15 +585,15 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 
                tablep = iopte_deref(pte, data);
        } else if (unmap_idx >= 0) {
-               io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
-               io_pgtable_tlb_sync(&data->iop);
+               io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
                return size;
        }
 
-       return __arm_lpae_unmap(data, iova, size, lvl, tablep);
+       return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep);
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+                              struct iommu_iotlb_gather *gather,
                               unsigned long iova, size_t size, int lvl,
                               arm_lpae_iopte *ptep)
 {
@@ -612,9 +615,8 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 
                if (!iopte_leaf(pte, lvl, iop->fmt)) {
                        /* Also flush any partial walks */
-                       io_pgtable_tlb_add_flush(iop, iova, size,
-                                               ARM_LPAE_GRANULE(data), false);
-                       io_pgtable_tlb_sync(iop);
+                       io_pgtable_tlb_flush_walk(iop, iova, size,
+                                                 ARM_LPAE_GRANULE(data));
                        ptep = iopte_deref(pte, data);
                        __arm_lpae_free_pgtable(data, lvl + 1, ptep);
                } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
@@ -625,7 +627,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
                         */
                        smp_wmb();
                } else {
-                       io_pgtable_tlb_add_flush(iop, iova, size, size, true);
+                       io_pgtable_tlb_add_page(iop, gather, iova, size);
                }
 
                return size;
@@ -634,17 +636,17 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
                 * Insert a table at the next level to map the old region,
                 * minus the part we want to unmap
                 */
-               return arm_lpae_split_blk_unmap(data, iova, size, pte,
+               return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
                                                lvl + 1, ptep);
        }
 
        /* Keep on walkin' */
        ptep = iopte_deref(pte, data);
-       return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
+       return __arm_lpae_unmap(data, gather, iova, size, lvl + 1, ptep);
 }
 
 static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-                            size_t size)
+                            size_t size, struct iommu_iotlb_gather *gather)
 {
        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
        arm_lpae_iopte *ptep = data->pgd;
@@ -653,7 +655,7 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
        if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
                return 0;
 
-       return __arm_lpae_unmap(data, iova, size, lvl, ptep);
+       return __arm_lpae_unmap(data, gather, iova, size, lvl, ptep);
 }
 
 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
@@ -1070,22 +1072,24 @@ static void dummy_tlb_flush_all(void *cookie)
        WARN_ON(cookie != cfg_cookie);
 }
 
-static void dummy_tlb_add_flush(unsigned long iova, size_t size,
-                               size_t granule, bool leaf, void *cookie)
+static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
+                           void *cookie)
 {
        WARN_ON(cookie != cfg_cookie);
        WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
 }
 
-static void dummy_tlb_sync(void *cookie)
+static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
+                              unsigned long iova, size_t granule, void *cookie)
 {
-       WARN_ON(cookie != cfg_cookie);
+       dummy_tlb_flush(iova, granule, granule, cookie);
 }
 
-static const struct iommu_gather_ops dummy_tlb_ops __initconst = {
+static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
        .tlb_flush_all  = dummy_tlb_flush_all,
-       .tlb_add_flush  = dummy_tlb_add_flush,
-       .tlb_sync       = dummy_tlb_sync,
+       .tlb_flush_walk = dummy_tlb_flush,
+       .tlb_flush_leaf = dummy_tlb_flush,
+       .tlb_add_page   = dummy_tlb_add_page,
 };
 
 static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
@@ -1168,7 +1172,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
 
                /* Partial unmap */
                size = 1UL << __ffs(cfg->pgsize_bitmap);
-               if (ops->unmap(ops, SZ_1G + size, size) != size)
+               if (ops->unmap(ops, SZ_1G + size, size, NULL) != size)
                        return __FAIL(ops, i);
 
                /* Remap of partial unmap */
@@ -1183,7 +1187,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
                for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
                        size = 1UL << j;
 
-                       if (ops->unmap(ops, iova, size) != size)
+                       if (ops->unmap(ops, iova, size, NULL) != size)
                                return __FAIL(ops, i);
 
                        if (ops->iova_to_phys(ops, iova + 42))
index 0c674d80c37fd5768fa62296f6be72f97c5dd245..d658c7c6a2ab0a6d3d6fcffdb89edf03f9b9bda2 100644 (file)
 
 static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
-#ifdef CONFIG_IOMMU_DEFAULT_PASSTHROUGH
-static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
-#else
-static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
-#endif
+
+static unsigned int iommu_def_domain_type __read_mostly;
 static bool iommu_dma_strict __read_mostly = true;
+static u32 iommu_cmd_line __read_mostly;
 
 struct iommu_group {
        struct kobject kobj;
@@ -68,6 +66,18 @@ static const char * const iommu_group_resv_type_string[] = {
        [IOMMU_RESV_SW_MSI]                     = "msi",
 };
 
+#define IOMMU_CMD_LINE_DMA_API         BIT(0)
+
+static void iommu_set_cmd_line_dma_api(void)
+{
+       iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
+}
+
+static bool iommu_cmd_line_dma_api(void)
+{
+       return !!(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API);
+}
+
 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)          \
 struct iommu_group_attribute iommu_group_attr_##_name =                \
        __ATTR(_name, _mode, _show, _store)
@@ -80,12 +90,55 @@ struct iommu_group_attribute iommu_group_attr_##_name =             \
 static LIST_HEAD(iommu_device_list);
 static DEFINE_SPINLOCK(iommu_device_lock);
 
+/*
+ * Use a function instead of an array here because the domain-type is a
+ * bit-field, so an array would waste memory.
+ */
+static const char *iommu_domain_type_str(unsigned int t)
+{
+       switch (t) {
+       case IOMMU_DOMAIN_BLOCKED:
+               return "Blocked";
+       case IOMMU_DOMAIN_IDENTITY:
+               return "Passthrough";
+       case IOMMU_DOMAIN_UNMANAGED:
+               return "Unmanaged";
+       case IOMMU_DOMAIN_DMA:
+               return "Translated";
+       default:
+               return "Unknown";
+       }
+}
+
+static int __init iommu_subsys_init(void)
+{
+       bool cmd_line = iommu_cmd_line_dma_api();
+
+       if (!cmd_line) {
+               if (IS_ENABLED(CONFIG_IOMMU_DEFAULT_PASSTHROUGH))
+                       iommu_set_default_passthrough(false);
+               else
+                       iommu_set_default_translated(false);
+
+               if (iommu_default_passthrough() && mem_encrypt_active()) {
+                       pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n");
+                       iommu_set_default_translated(false);
+               }
+       }
+
+       pr_info("Default domain type: %s %s\n",
+               iommu_domain_type_str(iommu_def_domain_type),
+               cmd_line ? "(set via kernel command line)" : "");
+
+       return 0;
+}
+subsys_initcall(iommu_subsys_init);
+
 int iommu_device_register(struct iommu_device *iommu)
 {
        spin_lock(&iommu_device_lock);
        list_add_tail(&iommu->list, &iommu_device_list);
        spin_unlock(&iommu_device_lock);
-
        return 0;
 }
 
@@ -165,7 +218,11 @@ static int __init iommu_set_def_domain_type(char *str)
        if (ret)
                return ret;
 
-       iommu_def_domain_type = pt ? IOMMU_DOMAIN_IDENTITY : IOMMU_DOMAIN_DMA;
+       if (pt)
+               iommu_set_default_passthrough(true);
+       else
+               iommu_set_default_translated(true);
+
        return 0;
 }
 early_param("iommu.passthrough", iommu_set_def_domain_type);
@@ -229,60 +286,58 @@ static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
  * @new: new region to insert
  * @regions: list of regions
  *
- * The new element is sorted by address with respect to the other
- * regions of the same type. In case it overlaps with another
- * region of the same type, regions are merged. In case it
- * overlaps with another region of different type, regions are
- * not merged.
+ * Elements are sorted by start address and overlapping segments
+ * of the same type are merged.
  */
-static int iommu_insert_resv_region(struct iommu_resv_region *new,
-                                   struct list_head *regions)
+int iommu_insert_resv_region(struct iommu_resv_region *new,
+                            struct list_head *regions)
 {
-       struct iommu_resv_region *region;
-       phys_addr_t start = new->start;
-       phys_addr_t end = new->start + new->length - 1;
-       struct list_head *pos = regions->next;
-
-       while (pos != regions) {
-               struct iommu_resv_region *entry =
-                       list_entry(pos, struct iommu_resv_region, list);
-               phys_addr_t a = entry->start;
-               phys_addr_t b = entry->start + entry->length - 1;
-               int type = entry->type;
-
-               if (end < a) {
-                       goto insert;
-               } else if (start > b) {
-                       pos = pos->next;
-               } else if ((start >= a) && (end <= b)) {
-                       if (new->type == type)
-                               return 0;
-                       else
-                               pos = pos->next;
+       struct iommu_resv_region *iter, *tmp, *nr, *top;
+       LIST_HEAD(stack);
+
+       nr = iommu_alloc_resv_region(new->start, new->length,
+                                    new->prot, new->type);
+       if (!nr)
+               return -ENOMEM;
+
+       /* First add the new element based on start address sorting */
+       list_for_each_entry(iter, regions, list) {
+               if (nr->start < iter->start ||
+                   (nr->start == iter->start && nr->type <= iter->type))
+                       break;
+       }
+       list_add_tail(&nr->list, &iter->list);
+
+       /* Merge overlapping segments of type nr->type in @regions, if any */
+       list_for_each_entry_safe(iter, tmp, regions, list) {
+               phys_addr_t top_end, iter_end = iter->start + iter->length - 1;
+
+               /* no merge needed on elements of different types than @nr */
+               if (iter->type != nr->type) {
+                       list_move_tail(&iter->list, &stack);
+                       continue;
+               }
+
+               /* look for the last stack element of same type as @iter */
+               list_for_each_entry_reverse(top, &stack, list)
+                       if (top->type == iter->type)
+                               goto check_overlap;
+
+               list_move_tail(&iter->list, &stack);
+               continue;
+
+check_overlap:
+               top_end = top->start + top->length - 1;
+
+               if (iter->start > top_end + 1) {
+                       list_move_tail(&iter->list, &stack);
                } else {
-                       if (new->type == type) {
-                               phys_addr_t new_start = min(a, start);
-                               phys_addr_t new_end = max(b, end);
-                               int ret;
-
-                               list_del(&entry->list);
-                               entry->start = new_start;
-                               entry->length = new_end - new_start + 1;
-                               ret = iommu_insert_resv_region(entry, regions);
-                               kfree(entry);
-                               return ret;
-                       } else {
-                               pos = pos->next;
-                       }
+                       top->length = max(top_end, iter_end) - top->start + 1;
+                       list_del(&iter->list);
+                       kfree(iter);
                }
        }
-insert:
-       region = iommu_alloc_resv_region(new->start, new->length,
-                                        new->prot, new->type);
-       if (!region)
-               return -ENOMEM;
-
-       list_add_tail(&region->list, pos);
+       list_splice(&stack, regions);
        return 0;
 }
 
@@ -1862,7 +1917,7 @@ EXPORT_SYMBOL_GPL(iommu_map);
 
 static size_t __iommu_unmap(struct iommu_domain *domain,
                            unsigned long iova, size_t size,
-                           bool sync)
+                           struct iommu_iotlb_gather *iotlb_gather)
 {
        const struct iommu_ops *ops = domain->ops;
        size_t unmapped_page, unmapped = 0;
@@ -1899,13 +1954,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
        while (unmapped < size) {
                size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
 
-               unmapped_page = ops->unmap(domain, iova, pgsize);
+               unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
                if (!unmapped_page)
                        break;
 
-               if (sync && ops->iotlb_range_add)
-                       ops->iotlb_range_add(domain, iova, pgsize);
-
                pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
                         iova, unmapped_page);
 
@@ -1913,9 +1965,6 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
                unmapped += unmapped_page;
        }
 
-       if (sync && ops->iotlb_sync)
-               ops->iotlb_sync(domain);
-
        trace_unmap(orig_iova, size, unmapped);
        return unmapped;
 }
@@ -1923,14 +1972,22 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 size_t iommu_unmap(struct iommu_domain *domain,
                   unsigned long iova, size_t size)
 {
-       return __iommu_unmap(domain, iova, size, true);
+       struct iommu_iotlb_gather iotlb_gather;
+       size_t ret;
+
+       iommu_iotlb_gather_init(&iotlb_gather);
+       ret = __iommu_unmap(domain, iova, size, &iotlb_gather);
+       iommu_tlb_sync(domain, &iotlb_gather);
+
+       return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
 
 size_t iommu_unmap_fast(struct iommu_domain *domain,
-                       unsigned long iova, size_t size)
+                       unsigned long iova, size_t size,
+                       struct iommu_iotlb_gather *iotlb_gather)
 {
-       return __iommu_unmap(domain, iova, size, false);
+       return __iommu_unmap(domain, iova, size, iotlb_gather);
 }
 EXPORT_SYMBOL_GPL(iommu_unmap_fast);
 
@@ -2143,7 +2200,6 @@ request_default_domain_for_dev(struct device *dev, unsigned long type)
 
        mutex_lock(&group->mutex);
 
-       /* Check if the default domain is already direct mapped */
        ret = 0;
        if (group->default_domain && group->default_domain->type == type)
                goto out;
@@ -2153,7 +2209,6 @@ request_default_domain_for_dev(struct device *dev, unsigned long type)
        if (iommu_group_device_count(group) != 1)
                goto out;
 
-       /* Allocate a direct mapped domain */
        ret = -ENOMEM;
        domain = __iommu_domain_alloc(dev->bus, type);
        if (!domain)
@@ -2168,7 +2223,7 @@ request_default_domain_for_dev(struct device *dev, unsigned long type)
 
        iommu_group_create_direct_mappings(group, dev);
 
-       /* Make the direct mapped domain the default for this group */
+       /* Make the domain the default for this group */
        if (group->default_domain)
                iommu_domain_free(group->default_domain);
        group->default_domain = domain;
@@ -2196,6 +2251,28 @@ int iommu_request_dma_domain_for_dev(struct device *dev)
        return request_default_domain_for_dev(dev, IOMMU_DOMAIN_DMA);
 }
 
+void iommu_set_default_passthrough(bool cmd_line)
+{
+       if (cmd_line)
+               iommu_set_cmd_line_dma_api();
+
+       iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
+}
+
+void iommu_set_default_translated(bool cmd_line)
+{
+       if (cmd_line)
+               iommu_set_cmd_line_dma_api();
+
+       iommu_def_domain_type = IOMMU_DOMAIN_DMA;
+}
+
+bool iommu_default_passthrough(void)
+{
+       return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY;
+}
+EXPORT_SYMBOL_GPL(iommu_default_passthrough);
+
 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 {
        const struct iommu_ops *ops = NULL;
index 3e1a8a6755723a927a7942a7429ab7e6c19a0027..41c605b0058f9615c2dbdd83f1de2404a9b1d255 100644 (file)
@@ -577,7 +577,9 @@ void queue_iova(struct iova_domain *iovad,
 
        spin_unlock_irqrestore(&fq->lock, flags);
 
-       if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0)
+       /* Avoid false sharing as much as possible. */
+       if (!atomic_read(&iovad->fq_timer_on) &&
+           !atomic_cmpxchg(&iovad->fq_timer_on, 0, 1))
                mod_timer(&iovad->fq_timer,
                          jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
 }
index ad0098c0c87c7544f18b70c0bd9dc43913ca0478..9da8309f71708f213f91dbe99374681e0da3f652 100644 (file)
@@ -49,6 +49,7 @@ struct ipmmu_features {
        bool setup_imbuscr;
        bool twobit_imttbcr_sl0;
        bool reserved_context;
+       bool cache_snoop;
 };
 
 struct ipmmu_vmsa_device {
@@ -115,45 +116,44 @@ static struct ipmmu_vmsa_device *to_ipmmu(struct device *dev)
 #define IMTTBCR                                0x0008
 #define IMTTBCR_EAE                    (1 << 31)
 #define IMTTBCR_PMB                    (1 << 30)
-#define IMTTBCR_SH1_NON_SHAREABLE      (0 << 28)
-#define IMTTBCR_SH1_OUTER_SHAREABLE    (2 << 28)
-#define IMTTBCR_SH1_INNER_SHAREABLE    (3 << 28)
-#define IMTTBCR_SH1_MASK               (3 << 28)
-#define IMTTBCR_ORGN1_NC               (0 << 26)
-#define IMTTBCR_ORGN1_WB_WA            (1 << 26)
-#define IMTTBCR_ORGN1_WT               (2 << 26)
-#define IMTTBCR_ORGN1_WB               (3 << 26)
-#define IMTTBCR_ORGN1_MASK             (3 << 26)
-#define IMTTBCR_IRGN1_NC               (0 << 24)
-#define IMTTBCR_IRGN1_WB_WA            (1 << 24)
-#define IMTTBCR_IRGN1_WT               (2 << 24)
-#define IMTTBCR_IRGN1_WB               (3 << 24)
-#define IMTTBCR_IRGN1_MASK             (3 << 24)
+#define IMTTBCR_SH1_NON_SHAREABLE      (0 << 28)       /* R-Car Gen2 only */
+#define IMTTBCR_SH1_OUTER_SHAREABLE    (2 << 28)       /* R-Car Gen2 only */
+#define IMTTBCR_SH1_INNER_SHAREABLE    (3 << 28)       /* R-Car Gen2 only */
+#define IMTTBCR_SH1_MASK               (3 << 28)       /* R-Car Gen2 only */
+#define IMTTBCR_ORGN1_NC               (0 << 26)       /* R-Car Gen2 only */
+#define IMTTBCR_ORGN1_WB_WA            (1 << 26)       /* R-Car Gen2 only */
+#define IMTTBCR_ORGN1_WT               (2 << 26)       /* R-Car Gen2 only */
+#define IMTTBCR_ORGN1_WB               (3 << 26)       /* R-Car Gen2 only */
+#define IMTTBCR_ORGN1_MASK             (3 << 26)       /* R-Car Gen2 only */
+#define IMTTBCR_IRGN1_NC               (0 << 24)       /* R-Car Gen2 only */
+#define IMTTBCR_IRGN1_WB_WA            (1 << 24)       /* R-Car Gen2 only */
+#define IMTTBCR_IRGN1_WT               (2 << 24)       /* R-Car Gen2 only */
+#define IMTTBCR_IRGN1_WB               (3 << 24)       /* R-Car Gen2 only */
+#define IMTTBCR_IRGN1_MASK             (3 << 24)       /* R-Car Gen2 only */
 #define IMTTBCR_TSZ1_MASK              (7 << 16)
 #define IMTTBCR_TSZ1_SHIFT             16
-#define IMTTBCR_SH0_NON_SHAREABLE      (0 << 12)
-#define IMTTBCR_SH0_OUTER_SHAREABLE    (2 << 12)
-#define IMTTBCR_SH0_INNER_SHAREABLE    (3 << 12)
-#define IMTTBCR_SH0_MASK               (3 << 12)
-#define IMTTBCR_ORGN0_NC               (0 << 10)
-#define IMTTBCR_ORGN0_WB_WA            (1 << 10)
-#define IMTTBCR_ORGN0_WT               (2 << 10)
-#define IMTTBCR_ORGN0_WB               (3 << 10)
-#define IMTTBCR_ORGN0_MASK             (3 << 10)
-#define IMTTBCR_IRGN0_NC               (0 << 8)
-#define IMTTBCR_IRGN0_WB_WA            (1 << 8)
-#define IMTTBCR_IRGN0_WT               (2 << 8)
-#define IMTTBCR_IRGN0_WB               (3 << 8)
-#define IMTTBCR_IRGN0_MASK             (3 << 8)
+#define IMTTBCR_SH0_NON_SHAREABLE      (0 << 12)       /* R-Car Gen2 only */
+#define IMTTBCR_SH0_OUTER_SHAREABLE    (2 << 12)       /* R-Car Gen2 only */
+#define IMTTBCR_SH0_INNER_SHAREABLE    (3 << 12)       /* R-Car Gen2 only */
+#define IMTTBCR_SH0_MASK               (3 << 12)       /* R-Car Gen2 only */
+#define IMTTBCR_ORGN0_NC               (0 << 10)       /* R-Car Gen2 only */
+#define IMTTBCR_ORGN0_WB_WA            (1 << 10)       /* R-Car Gen2 only */
+#define IMTTBCR_ORGN0_WT               (2 << 10)       /* R-Car Gen2 only */
+#define IMTTBCR_ORGN0_WB               (3 << 10)       /* R-Car Gen2 only */
+#define IMTTBCR_ORGN0_MASK             (3 << 10)       /* R-Car Gen2 only */
+#define IMTTBCR_IRGN0_NC               (0 << 8)        /* R-Car Gen2 only */
+#define IMTTBCR_IRGN0_WB_WA            (1 << 8)        /* R-Car Gen2 only */
+#define IMTTBCR_IRGN0_WT               (2 << 8)        /* R-Car Gen2 only */
+#define IMTTBCR_IRGN0_WB               (3 << 8)        /* R-Car Gen2 only */
+#define IMTTBCR_IRGN0_MASK             (3 << 8)        /* R-Car Gen2 only */
+#define IMTTBCR_SL0_TWOBIT_LVL_3       (0 << 6)        /* R-Car Gen3 only */
+#define IMTTBCR_SL0_TWOBIT_LVL_2       (1 << 6)        /* R-Car Gen3 only */
+#define IMTTBCR_SL0_TWOBIT_LVL_1       (2 << 6)        /* R-Car Gen3 only */
 #define IMTTBCR_SL0_LVL_2              (0 << 4)
 #define IMTTBCR_SL0_LVL_1              (1 << 4)
 #define IMTTBCR_TSZ0_MASK              (7 << 0)
 #define IMTTBCR_TSZ0_SHIFT             O
 
-#define IMTTBCR_SL0_TWOBIT_LVL_3       (0 << 6)
-#define IMTTBCR_SL0_TWOBIT_LVL_2       (1 << 6)
-#define IMTTBCR_SL0_TWOBIT_LVL_1       (2 << 6)
-
 #define IMBUSCR                                0x000c
 #define IMBUSCR_DVM                    (1 << 2)
 #define IMBUSCR_BUSSEL_SYS             (0 << 0)
@@ -361,16 +361,16 @@ static void ipmmu_tlb_flush_all(void *cookie)
        ipmmu_tlb_invalidate(domain);
 }
 
-static void ipmmu_tlb_add_flush(unsigned long iova, size_t size,
-                               size_t granule, bool leaf, void *cookie)
+static void ipmmu_tlb_flush(unsigned long iova, size_t size,
+                               size_t granule, void *cookie)
 {
-       /* The hardware doesn't support selective TLB flush. */
+       ipmmu_tlb_flush_all(cookie);
 }
 
-static const struct iommu_gather_ops ipmmu_gather_ops = {
+static const struct iommu_flush_ops ipmmu_flush_ops = {
        .tlb_flush_all = ipmmu_tlb_flush_all,
-       .tlb_add_flush = ipmmu_tlb_add_flush,
-       .tlb_sync = ipmmu_tlb_flush_all,
+       .tlb_flush_walk = ipmmu_tlb_flush,
+       .tlb_flush_leaf = ipmmu_tlb_flush,
 };
 
 /* -----------------------------------------------------------------------------
@@ -422,17 +422,19 @@ static void ipmmu_domain_setup_context(struct ipmmu_vmsa_domain *domain)
 
        /*
         * TTBCR
-        * We use long descriptors with inner-shareable WBWA tables and allocate
-        * the whole 32-bit VA space to TTBR0.
+        * We use long descriptors and allocate the whole 32-bit VA space to
+        * TTBR0.
         */
        if (domain->mmu->features->twobit_imttbcr_sl0)
                tmp = IMTTBCR_SL0_TWOBIT_LVL_1;
        else
                tmp = IMTTBCR_SL0_LVL_1;
 
-       ipmmu_ctx_write_root(domain, IMTTBCR, IMTTBCR_EAE |
-                            IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA |
-                            IMTTBCR_IRGN0_WB_WA | tmp);
+       if (domain->mmu->features->cache_snoop)
+               tmp |= IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA |
+                      IMTTBCR_IRGN0_WB_WA;
+
+       ipmmu_ctx_write_root(domain, IMTTBCR, IMTTBCR_EAE | tmp);
 
        /* MAIR0 */
        ipmmu_ctx_write_root(domain, IMMAIR0,
@@ -480,7 +482,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
        domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K;
        domain->cfg.ias = 32;
        domain->cfg.oas = 40;
-       domain->cfg.tlb = &ipmmu_gather_ops;
+       domain->cfg.tlb = &ipmmu_flush_ops;
        domain->io_domain.geometry.aperture_end = DMA_BIT_MASK(32);
        domain->io_domain.geometry.force_aperture = true;
        /*
@@ -733,14 +735,14 @@ static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
 }
 
 static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
-                         size_t size)
+                         size_t size, struct iommu_iotlb_gather *gather)
 {
        struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 
-       return domain->iop->unmap(domain->iop, iova, size);
+       return domain->iop->unmap(domain->iop, iova, size, gather);
 }
 
-static void ipmmu_iotlb_sync(struct iommu_domain *io_domain)
+static void ipmmu_flush_iotlb_all(struct iommu_domain *io_domain)
 {
        struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 
@@ -748,6 +750,12 @@ static void ipmmu_iotlb_sync(struct iommu_domain *io_domain)
                ipmmu_tlb_flush_all(domain);
 }
 
+static void ipmmu_iotlb_sync(struct iommu_domain *io_domain,
+                            struct iommu_iotlb_gather *gather)
+{
+       ipmmu_flush_iotlb_all(io_domain);
+}
+
 static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
                                      dma_addr_t iova)
 {
@@ -957,7 +965,7 @@ static const struct iommu_ops ipmmu_ops = {
        .detach_dev = ipmmu_detach_device,
        .map = ipmmu_map,
        .unmap = ipmmu_unmap,
-       .flush_iotlb_all = ipmmu_iotlb_sync,
+       .flush_iotlb_all = ipmmu_flush_iotlb_all,
        .iotlb_sync = ipmmu_iotlb_sync,
        .iova_to_phys = ipmmu_iova_to_phys,
        .add_device = ipmmu_add_device,
@@ -988,6 +996,7 @@ static const struct ipmmu_features ipmmu_features_default = {
        .setup_imbuscr = true,
        .twobit_imttbcr_sl0 = false,
        .reserved_context = false,
+       .cache_snoop = true,
 };
 
 static const struct ipmmu_features ipmmu_features_rcar_gen3 = {
@@ -998,6 +1007,7 @@ static const struct ipmmu_features ipmmu_features_rcar_gen3 = {
        .setup_imbuscr = false,
        .twobit_imttbcr_sl0 = true,
        .reserved_context = true,
+       .cache_snoop = false,
 };
 
 static const struct of_device_id ipmmu_of_ids[] = {
index b25e2eb9e038d9460a94b487d66c718693f3c8cc..be99d408cf35dc7b8d26612b979d4c584de64c4a 100644 (file)
@@ -168,20 +168,29 @@ fail:
        return;
 }
 
-static void __flush_iotlb_sync(void *cookie)
+static void __flush_iotlb_walk(unsigned long iova, size_t size,
+                              size_t granule, void *cookie)
 {
-       /*
-        * Nothing is needed here, the barrier to guarantee
-        * completion of the tlb sync operation is implicitly
-        * taken care when the iommu client does a writel before
-        * kick starting the other master.
-        */
+       __flush_iotlb_range(iova, size, granule, false, cookie);
+}
+
+static void __flush_iotlb_leaf(unsigned long iova, size_t size,
+                              size_t granule, void *cookie)
+{
+       __flush_iotlb_range(iova, size, granule, true, cookie);
 }
 
-static const struct iommu_gather_ops msm_iommu_gather_ops = {
+static void __flush_iotlb_page(struct iommu_iotlb_gather *gather,
+                              unsigned long iova, size_t granule, void *cookie)
+{
+       __flush_iotlb_range(iova, granule, granule, true, cookie);
+}
+
+static const struct iommu_flush_ops msm_iommu_flush_ops = {
        .tlb_flush_all = __flush_iotlb,
-       .tlb_add_flush = __flush_iotlb_range,
-       .tlb_sync = __flush_iotlb_sync,
+       .tlb_flush_walk = __flush_iotlb_walk,
+       .tlb_flush_leaf = __flush_iotlb_leaf,
+       .tlb_add_page = __flush_iotlb_page,
 };
 
 static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end)
@@ -345,7 +354,7 @@ static int msm_iommu_domain_config(struct msm_priv *priv)
                .pgsize_bitmap = msm_iommu_ops.pgsize_bitmap,
                .ias = 32,
                .oas = 32,
-               .tlb = &msm_iommu_gather_ops,
+               .tlb = &msm_iommu_flush_ops,
                .iommu_dev = priv->dev,
        };
 
@@ -509,13 +518,13 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
 }
 
 static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-                             size_t len)
+                             size_t len, struct iommu_iotlb_gather *gather)
 {
        struct msm_priv *priv = to_msm_priv(domain);
        unsigned long flags;
 
        spin_lock_irqsave(&priv->pgtlock, flags);
-       len = priv->iop->unmap(priv->iop, iova, len);
+       len = priv->iop->unmap(priv->iop, iova, len, gather);
        spin_unlock_irqrestore(&priv->pgtlock, flags);
 
        return len;
@@ -691,6 +700,13 @@ static struct iommu_ops msm_iommu_ops = {
        .detach_dev = msm_iommu_detach_dev,
        .map = msm_iommu_map,
        .unmap = msm_iommu_unmap,
+       /*
+        * Nothing is needed here, the barrier to guarantee
+        * completion of the tlb sync operation is implicitly
+        * taken care when the iommu client does a writel before
+        * kick starting the other master.
+        */
+       .iotlb_sync = NULL,
        .iova_to_phys = msm_iommu_iova_to_phys,
        .add_device = msm_iommu_add_device,
        .remove_device = msm_iommu_remove_device,
@@ -750,7 +766,6 @@ static int msm_iommu_probe(struct platform_device *pdev)
 
        iommu->irq = platform_get_irq(pdev, 0);
        if (iommu->irq < 0) {
-               dev_err(iommu->dev, "could not get iommu irq\n");
                ret = -ENODEV;
                goto fail;
        }
index 82e4be4dfdaf84d17cc662fb7db2708cbeaf197c..67a483c1a9357c7629d569ae1456ca8181b92c89 100644 (file)
@@ -28,6 +28,7 @@
 #include "mtk_iommu.h"
 
 #define REG_MMU_PT_BASE_ADDR                   0x000
+#define MMU_PT_ADDR_MASK                       GENMASK(31, 7)
 
 #define REG_MMU_INVALIDATE                     0x020
 #define F_ALL_INVLD                            0x2
 #define REG_MMU_DCM_DIS                                0x050
 
 #define REG_MMU_CTRL_REG                       0x110
+#define F_MMU_TF_PROT_TO_PROGRAM_ADDR          (2 << 4)
 #define F_MMU_PREFETCH_RT_REPLACE_MOD          BIT(4)
-#define F_MMU_TF_PROTECT_SEL_SHIFT(data) \
-       ((data)->m4u_plat == M4U_MT2712 ? 4 : 5)
-/* It's named by F_MMU_TF_PROT_SEL in mt2712. */
-#define F_MMU_TF_PROTECT_SEL(prot, data) \
-       (((prot) & 0x3) << F_MMU_TF_PROTECT_SEL_SHIFT(data))
+#define F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173   (2 << 5)
 
 #define REG_MMU_IVRP_PADDR                     0x114
 
 #define F_INT_CLR_BIT                          BIT(12)
 
 #define REG_MMU_INT_MAIN_CONTROL               0x124
-#define F_INT_TRANSLATION_FAULT                        BIT(0)
-#define F_INT_MAIN_MULTI_HIT_FAULT             BIT(1)
-#define F_INT_INVALID_PA_FAULT                 BIT(2)
-#define F_INT_ENTRY_REPLACEMENT_FAULT          BIT(3)
-#define F_INT_TLB_MISS_FAULT                   BIT(4)
-#define F_INT_MISS_TRANSACTION_FIFO_FAULT      BIT(5)
-#define F_INT_PRETETCH_TRANSATION_FIFO_FAULT   BIT(6)
+                                               /* mmu0 | mmu1 */
+#define F_INT_TRANSLATION_FAULT                        (BIT(0) | BIT(7))
+#define F_INT_MAIN_MULTI_HIT_FAULT             (BIT(1) | BIT(8))
+#define F_INT_INVALID_PA_FAULT                 (BIT(2) | BIT(9))
+#define F_INT_ENTRY_REPLACEMENT_FAULT          (BIT(3) | BIT(10))
+#define F_INT_TLB_MISS_FAULT                   (BIT(4) | BIT(11))
+#define F_INT_MISS_TRANSACTION_FIFO_FAULT      (BIT(5) | BIT(12))
+#define F_INT_PRETETCH_TRANSATION_FIFO_FAULT   (BIT(6) | BIT(13))
 
 #define REG_MMU_CPE_DONE                       0x12C
 
 #define REG_MMU_FAULT_ST1                      0x134
+#define F_REG_MMU0_FAULT_MASK                  GENMASK(6, 0)
+#define F_REG_MMU1_FAULT_MASK                  GENMASK(13, 7)
 
-#define REG_MMU_FAULT_VA                       0x13c
+#define REG_MMU0_FAULT_VA                      0x13c
 #define F_MMU_FAULT_VA_WRITE_BIT               BIT(1)
 #define F_MMU_FAULT_VA_LAYER_BIT               BIT(0)
 
-#define REG_MMU_INVLD_PA                       0x140
-#define REG_MMU_INT_ID                         0x150
-#define F_MMU0_INT_ID_LARB_ID(a)               (((a) >> 7) & 0x7)
-#define F_MMU0_INT_ID_PORT_ID(a)               (((a) >> 2) & 0x1f)
+#define REG_MMU0_INVLD_PA                      0x140
+#define REG_MMU1_FAULT_VA                      0x144
+#define REG_MMU1_INVLD_PA                      0x148
+#define REG_MMU0_INT_ID                                0x150
+#define REG_MMU1_INT_ID                                0x154
+#define F_MMU_INT_ID_LARB_ID(a)                        (((a) >> 7) & 0x7)
+#define F_MMU_INT_ID_PORT_ID(a)                        (((a) >> 2) & 0x1f)
 
 #define MTK_PROTECT_PA_ALIGN                   128
 
@@ -107,6 +111,30 @@ struct mtk_iommu_domain {
 
 static const struct iommu_ops mtk_iommu_ops;
 
+/*
+ * In M4U 4GB mode, the physical address is remapped as below:
+ *
+ * CPU Physical address:
+ * ====================
+ *
+ * 0      1G       2G     3G       4G     5G
+ * |---A---|---B---|---C---|---D---|---E---|
+ * +--I/O--+------------Memory-------------+
+ *
+ * IOMMU output physical address:
+ *  =============================
+ *
+ *                                 4G      5G     6G      7G      8G
+ *                                 |---E---|---B---|---C---|---D---|
+ *                                 +------------Memory-------------+
+ *
+ * The Region 'A'(I/O) can NOT be mapped by M4U; For Region 'B'/'C'/'D', the
+ * bit32 of the CPU physical address always is needed to set, and for Region
+ * 'E', the CPU physical address keep as is.
+ * Additionally, The iommu consumers always use the CPU phyiscal address.
+ */
+#define MTK_IOMMU_4GB_MODE_REMAP_BASE   0x140000000UL
+
 static LIST_HEAD(m4ulist);     /* List all the M4U HWs */
 
 #define for_each_m4u(data)     list_for_each_entry(data, &m4ulist, list)
@@ -188,10 +216,32 @@ static void mtk_iommu_tlb_sync(void *cookie)
        }
 }
 
-static const struct iommu_gather_ops mtk_iommu_gather_ops = {
+static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size,
+                                    size_t granule, void *cookie)
+{
+       mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie);
+       mtk_iommu_tlb_sync(cookie);
+}
+
+static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
+                                    size_t granule, void *cookie)
+{
+       mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie);
+       mtk_iommu_tlb_sync(cookie);
+}
+
+static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
+                                           unsigned long iova, size_t granule,
+                                           void *cookie)
+{
+       mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
+}
+
+static const struct iommu_flush_ops mtk_iommu_flush_ops = {
        .tlb_flush_all = mtk_iommu_tlb_flush_all,
-       .tlb_add_flush = mtk_iommu_tlb_add_flush_nosync,
-       .tlb_sync = mtk_iommu_tlb_sync,
+       .tlb_flush_walk = mtk_iommu_tlb_flush_walk,
+       .tlb_flush_leaf = mtk_iommu_tlb_flush_leaf,
+       .tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
 };
 
 static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
@@ -204,13 +254,21 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
 
        /* Read error info from registers */
        int_state = readl_relaxed(data->base + REG_MMU_FAULT_ST1);
-       fault_iova = readl_relaxed(data->base + REG_MMU_FAULT_VA);
+       if (int_state & F_REG_MMU0_FAULT_MASK) {
+               regval = readl_relaxed(data->base + REG_MMU0_INT_ID);
+               fault_iova = readl_relaxed(data->base + REG_MMU0_FAULT_VA);
+               fault_pa = readl_relaxed(data->base + REG_MMU0_INVLD_PA);
+       } else {
+               regval = readl_relaxed(data->base + REG_MMU1_INT_ID);
+               fault_iova = readl_relaxed(data->base + REG_MMU1_FAULT_VA);
+               fault_pa = readl_relaxed(data->base + REG_MMU1_INVLD_PA);
+       }
        layer = fault_iova & F_MMU_FAULT_VA_LAYER_BIT;
        write = fault_iova & F_MMU_FAULT_VA_WRITE_BIT;
-       fault_pa = readl_relaxed(data->base + REG_MMU_INVLD_PA);
-       regval = readl_relaxed(data->base + REG_MMU_INT_ID);
-       fault_larb = F_MMU0_INT_ID_LARB_ID(regval);
-       fault_port = F_MMU0_INT_ID_PORT_ID(regval);
+       fault_larb = F_MMU_INT_ID_LARB_ID(regval);
+       fault_port = F_MMU_INT_ID_PORT_ID(regval);
+
+       fault_larb = data->plat_data->larbid_remap[fault_larb];
 
        if (report_iommu_fault(&dom->domain, data->dev, fault_iova,
                               write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) {
@@ -242,7 +300,7 @@ static void mtk_iommu_config(struct mtk_iommu_data *data,
        for (i = 0; i < fwspec->num_ids; ++i) {
                larbid = MTK_M4U_TO_LARB(fwspec->ids[i]);
                portid = MTK_M4U_TO_PORT(fwspec->ids[i]);
-               larb_mmu = &data->smi_imu.larb_imu[larbid];
+               larb_mmu = &data->larb_imu[larbid];
 
                dev_dbg(dev, "%s iommu port: %d\n",
                        enable ? "enable" : "disable", portid);
@@ -263,17 +321,15 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
        dom->cfg = (struct io_pgtable_cfg) {
                .quirks = IO_PGTABLE_QUIRK_ARM_NS |
                        IO_PGTABLE_QUIRK_NO_PERMS |
-                       IO_PGTABLE_QUIRK_TLBI_ON_MAP,
+                       IO_PGTABLE_QUIRK_TLBI_ON_MAP |
+                       IO_PGTABLE_QUIRK_ARM_MTK_EXT,
                .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
                .ias = 32,
-               .oas = 32,
-               .tlb = &mtk_iommu_gather_ops,
+               .oas = 34,
+               .tlb = &mtk_iommu_flush_ops,
                .iommu_dev = data->dev,
        };
 
-       if (data->enable_4GB)
-               dom->cfg.quirks |= IO_PGTABLE_QUIRK_ARM_MTK_4GB;
-
        dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
        if (!dom->iop) {
                dev_err(data->dev, "Failed to alloc io pgtable\n");
@@ -336,7 +392,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
        /* Update the pgtable base address register of the M4U HW */
        if (!data->m4u_dom) {
                data->m4u_dom = dom;
-               writel(dom->cfg.arm_v7s_cfg.ttbr[0],
+               writel(dom->cfg.arm_v7s_cfg.ttbr[0] & MMU_PT_ADDR_MASK,
                       data->base + REG_MMU_PT_BASE_ADDR);
        }
 
@@ -359,32 +415,43 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
                         phys_addr_t paddr, size_t size, int prot)
 {
        struct mtk_iommu_domain *dom = to_mtk_domain(domain);
+       struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
        unsigned long flags;
        int ret;
 
+       /* The "4GB mode" M4U physically can not use the lower remap of Dram. */
+       if (data->enable_4GB)
+               paddr |= BIT_ULL(32);
+
        spin_lock_irqsave(&dom->pgtlock, flags);
-       ret = dom->iop->map(dom->iop, iova, paddr & DMA_BIT_MASK(32),
-                           size, prot);
+       ret = dom->iop->map(dom->iop, iova, paddr, size, prot);
        spin_unlock_irqrestore(&dom->pgtlock, flags);
 
        return ret;
 }
 
 static size_t mtk_iommu_unmap(struct iommu_domain *domain,
-                             unsigned long iova, size_t size)
+                             unsigned long iova, size_t size,
+                             struct iommu_iotlb_gather *gather)
 {
        struct mtk_iommu_domain *dom = to_mtk_domain(domain);
        unsigned long flags;
        size_t unmapsz;
 
        spin_lock_irqsave(&dom->pgtlock, flags);
-       unmapsz = dom->iop->unmap(dom->iop, iova, size);
+       unmapsz = dom->iop->unmap(dom->iop, iova, size, gather);
        spin_unlock_irqrestore(&dom->pgtlock, flags);
 
        return unmapsz;
 }
 
-static void mtk_iommu_iotlb_sync(struct iommu_domain *domain)
+static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+       mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
+}
+
+static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
+                                struct iommu_iotlb_gather *gather)
 {
        mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
 }
@@ -401,8 +468,8 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
        pa = dom->iop->iova_to_phys(dom->iop, iova);
        spin_unlock_irqrestore(&dom->pgtlock, flags);
 
-       if (data->enable_4GB)
-               pa |= BIT_ULL(32);
+       if (data->enable_4GB && pa >= MTK_IOMMU_4GB_MODE_REMAP_BASE)
+               pa &= ~BIT_ULL(32);
 
        return pa;
 }
@@ -490,7 +557,7 @@ static const struct iommu_ops mtk_iommu_ops = {
        .detach_dev     = mtk_iommu_detach_device,
        .map            = mtk_iommu_map,
        .unmap          = mtk_iommu_unmap,
-       .flush_iotlb_all = mtk_iommu_iotlb_sync,
+       .flush_iotlb_all = mtk_iommu_flush_iotlb_all,
        .iotlb_sync     = mtk_iommu_iotlb_sync,
        .iova_to_phys   = mtk_iommu_iova_to_phys,
        .add_device     = mtk_iommu_add_device,
@@ -511,9 +578,11 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
                return ret;
        }
 
-       regval = F_MMU_TF_PROTECT_SEL(2, data);
-       if (data->m4u_plat == M4U_MT8173)
-               regval |= F_MMU_PREFETCH_RT_REPLACE_MOD;
+       if (data->plat_data->m4u_plat == M4U_MT8173)
+               regval = F_MMU_PREFETCH_RT_REPLACE_MOD |
+                        F_MMU_TF_PROT_TO_PROGRAM_ADDR_MT8173;
+       else
+               regval = F_MMU_TF_PROT_TO_PROGRAM_ADDR;
        writel_relaxed(regval, data->base + REG_MMU_CTRL_REG);
 
        regval = F_L2_MULIT_HIT_EN |
@@ -533,14 +602,14 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
                F_INT_PRETETCH_TRANSATION_FIFO_FAULT;
        writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL);
 
-       if (data->m4u_plat == M4U_MT8173)
+       if (data->plat_data->m4u_plat == M4U_MT8173)
                regval = (data->protect_base >> 1) | (data->enable_4GB << 31);
        else
                regval = lower_32_bits(data->protect_base) |
                         upper_32_bits(data->protect_base);
        writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR);
 
-       if (data->enable_4GB && data->m4u_plat != M4U_MT8173) {
+       if (data->enable_4GB && data->plat_data->has_vld_pa_rng) {
                /*
                 * If 4GB mode is enabled, the validate PA range is from
                 * 0x1_0000_0000 to 0x1_ffff_ffff. here record bit[32:30].
@@ -550,8 +619,7 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
        }
        writel_relaxed(0, data->base + REG_MMU_DCM_DIS);
 
-       /* It's MISC control register whose default value is ok except mt8173.*/
-       if (data->m4u_plat == M4U_MT8173)
+       if (data->plat_data->reset_axi)
                writel_relaxed(0, data->base + REG_MMU_STANDARD_AXI_MODE);
 
        if (devm_request_irq(data->dev, data->irq, mtk_iommu_isr, 0,
@@ -584,7 +652,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
        if (!data)
                return -ENOMEM;
        data->dev = dev;
-       data->m4u_plat = (enum mtk_iommu_plat)of_device_get_match_data(dev);
+       data->plat_data = of_device_get_match_data(dev);
 
        /* Protect memory. HW will access here while translation fault.*/
        protect = devm_kzalloc(dev, MTK_PROTECT_PA_ALIGN * 2, GFP_KERNEL);
@@ -594,6 +662,8 @@ static int mtk_iommu_probe(struct platform_device *pdev)
 
        /* Whether the current dram is over 4GB */
        data->enable_4GB = !!(max_pfn > (BIT_ULL(32) >> PAGE_SHIFT));
+       if (!data->plat_data->has_4gb_mode)
+               data->enable_4GB = false;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        data->base = devm_ioremap_resource(dev, res);
@@ -605,15 +675,16 @@ static int mtk_iommu_probe(struct platform_device *pdev)
        if (data->irq < 0)
                return data->irq;
 
-       data->bclk = devm_clk_get(dev, "bclk");
-       if (IS_ERR(data->bclk))
-               return PTR_ERR(data->bclk);
+       if (data->plat_data->has_bclk) {
+               data->bclk = devm_clk_get(dev, "bclk");
+               if (IS_ERR(data->bclk))
+                       return PTR_ERR(data->bclk);
+       }
 
        larb_nr = of_count_phandle_with_args(dev->of_node,
                                             "mediatek,larbs", NULL);
        if (larb_nr < 0)
                return larb_nr;
-       data->smi_imu.larb_nr = larb_nr;
 
        for (i = 0; i < larb_nr; i++) {
                struct device_node *larbnode;
@@ -638,7 +709,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
                        of_node_put(larbnode);
                        return -EPROBE_DEFER;
                }
-               data->smi_imu.larb_imu[id].dev = &plarbdev->dev;
+               data->larb_imu[id].dev = &plarbdev->dev;
 
                component_match_add_release(dev, &match, release_of,
                                            compare_of, larbnode);
@@ -699,6 +770,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev)
        reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0);
        reg->int_main_control = readl_relaxed(base + REG_MMU_INT_MAIN_CONTROL);
        reg->ivrp_paddr = readl_relaxed(base + REG_MMU_IVRP_PADDR);
+       reg->vld_pa_rng = readl_relaxed(base + REG_MMU_VLD_PA_RNG);
        clk_disable_unprepare(data->bclk);
        return 0;
 }
@@ -707,6 +779,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev)
 {
        struct mtk_iommu_data *data = dev_get_drvdata(dev);
        struct mtk_iommu_suspend_reg *reg = &data->reg;
+       struct mtk_iommu_domain *m4u_dom = data->m4u_dom;
        void __iomem *base = data->base;
        int ret;
 
@@ -722,8 +795,9 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev)
        writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0);
        writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL);
        writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR);
-       if (data->m4u_dom)
-               writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
+       writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG);
+       if (m4u_dom)
+               writel(m4u_dom->cfg.arm_v7s_cfg.ttbr[0] & MMU_PT_ADDR_MASK,
                       base + REG_MMU_PT_BASE_ADDR);
        return 0;
 }
@@ -732,9 +806,32 @@ static const struct dev_pm_ops mtk_iommu_pm_ops = {
        SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_iommu_suspend, mtk_iommu_resume)
 };
 
+static const struct mtk_iommu_plat_data mt2712_data = {
+       .m4u_plat     = M4U_MT2712,
+       .has_4gb_mode = true,
+       .has_bclk     = true,
+       .has_vld_pa_rng   = true,
+       .larbid_remap = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
+};
+
+static const struct mtk_iommu_plat_data mt8173_data = {
+       .m4u_plat     = M4U_MT8173,
+       .has_4gb_mode = true,
+       .has_bclk     = true,
+       .reset_axi    = true,
+       .larbid_remap = {0, 1, 2, 3, 4, 5}, /* Linear mapping. */
+};
+
+static const struct mtk_iommu_plat_data mt8183_data = {
+       .m4u_plat     = M4U_MT8183,
+       .reset_axi    = true,
+       .larbid_remap = {0, 4, 5, 6, 7, 2, 3, 1},
+};
+
 static const struct of_device_id mtk_iommu_of_ids[] = {
-       { .compatible = "mediatek,mt2712-m4u", .data = (void *)M4U_MT2712},
-       { .compatible = "mediatek,mt8173-m4u", .data = (void *)M4U_MT8173},
+       { .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data},
+       { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data},
+       { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data},
        {}
 };
 
index 59337323db58e2039b948ae5faca3440533215e1..fc0f16eabacdfa2340677a6db385ffa8fd94c5db 100644 (file)
@@ -24,12 +24,25 @@ struct mtk_iommu_suspend_reg {
        u32                             int_control0;
        u32                             int_main_control;
        u32                             ivrp_paddr;
+       u32                             vld_pa_rng;
 };
 
 enum mtk_iommu_plat {
        M4U_MT2701,
        M4U_MT2712,
        M4U_MT8173,
+       M4U_MT8183,
+};
+
+struct mtk_iommu_plat_data {
+       enum mtk_iommu_plat m4u_plat;
+       bool                has_4gb_mode;
+
+       /* HW will use the EMI clock if there isn't the "bclk". */
+       bool                has_bclk;
+       bool                has_vld_pa_rng;
+       bool                reset_axi;
+       unsigned char       larbid_remap[MTK_LARB_NR_MAX];
 };
 
 struct mtk_iommu_domain;
@@ -43,14 +56,14 @@ struct mtk_iommu_data {
        struct mtk_iommu_suspend_reg    reg;
        struct mtk_iommu_domain         *m4u_dom;
        struct iommu_group              *m4u_group;
-       struct mtk_smi_iommu            smi_imu;      /* SMI larb iommu info */
        bool                            enable_4GB;
        bool                            tlb_flush_active;
 
        struct iommu_device             iommu;
-       enum mtk_iommu_plat             m4u_plat;
+       const struct mtk_iommu_plat_data *plat_data;
 
        struct list_head                list;
+       struct mtk_smi_larb_iommu       larb_imu[MTK_LARB_NR_MAX];
 };
 
 static inline int compare_of(struct device *dev, void *data)
@@ -67,14 +80,14 @@ static inline int mtk_iommu_bind(struct device *dev)
 {
        struct mtk_iommu_data *data = dev_get_drvdata(dev);
 
-       return component_bind_all(dev, &data->smi_imu);
+       return component_bind_all(dev, &data->larb_imu);
 }
 
 static inline void mtk_iommu_unbind(struct device *dev)
 {
        struct mtk_iommu_data *data = dev_get_drvdata(dev);
 
-       component_unbind_all(dev, &data->smi_imu);
+       component_unbind_all(dev, &data->larb_imu);
 }
 
 #endif
index abeeac488372c8e5735f8838deb0ed7f5eef0eca..210b1c7c0bdae4202766dca020dda1159d6917da 100644 (file)
@@ -206,7 +206,7 @@ static void mtk_iommu_config(struct mtk_iommu_data *data,
        for (i = 0; i < fwspec->num_ids; ++i) {
                larbid = mt2701_m4u_to_larb(fwspec->ids[i]);
                portid = mt2701_m4u_to_port(fwspec->ids[i]);
-               larb_mmu = &data->smi_imu.larb_imu[larbid];
+               larb_mmu = &data->larb_imu[larbid];
 
                dev_dbg(dev, "%s iommu port: %d\n",
                        enable ? "enable" : "disable", portid);
@@ -324,7 +324,8 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
 }
 
 static size_t mtk_iommu_unmap(struct iommu_domain *domain,
-                             unsigned long iova, size_t size)
+                             unsigned long iova, size_t size,
+                             struct iommu_iotlb_gather *gather)
 {
        struct mtk_iommu_domain *dom = to_mtk_domain(domain);
        unsigned long flags;
@@ -610,14 +611,12 @@ static int mtk_iommu_probe(struct platform_device *pdev)
                        }
                }
 
-               data->smi_imu.larb_imu[larb_nr].dev = &plarbdev->dev;
+               data->larb_imu[larb_nr].dev = &plarbdev->dev;
                component_match_add_release(dev, &match, release_of,
                                            compare_of, larb_spec.np);
                larb_nr++;
        }
 
-       data->smi_imu.larb_nr = larb_nr;
-
        platform_set_drvdata(pdev, data);
 
        ret = mtk_iommu_hw_init(data);