Merge git://git.infradead.org/intel-iommu
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Nov 2015 00:06:52 +0000 (16:06 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Nov 2015 00:06:52 +0000 (16:06 -0800)
Pull intel iommu updates from David Woodhouse:
 "This adds "Shared Virtual Memory" (aka PASID support) for the Intel
  IOMMU.  This allows devices to do DMA using process address space,
  translated through the normal CPU page tables for the relevant mm.

  With corresponding support added to the i915 driver, this has been
  tested with the graphics device on Skylake.  We don't have the
  required TLP support in our PCIe root ports for supporting discrete
  devices yet, so it's only integrated devices that can do it so far"

* git://git.infradead.org/intel-iommu: (23 commits)
  iommu/vt-d: Fix rwxp flags in SVM device fault callback
  iommu/vt-d: Expose struct svm_dev_ops without CONFIG_INTEL_IOMMU_SVM
  iommu/vt-d: Clean up pasid_enabled() and ecs_enabled() dependencies
  iommu/vt-d: Handle Caching Mode implementations of SVM
  iommu/vt-d: Fix SVM IOTLB flush handling
  iommu/vt-d: Use dev_err(..) in intel_svm_device_to_iommu(..)
  iommu/vt-d: fix a loop in prq_event_thread()
  iommu/vt-d: Fix IOTLB flushing for global pages
  iommu/vt-d: Fix address shifting in page request handler
  iommu/vt-d: shift wrapping bug in prq_event_thread()
  iommu/vt-d: Fix NULL pointer dereference in page request error case
  iommu/vt-d: Implement SVM_FLAG_SUPERVISOR_MODE for kernel access
  iommu/vt-d: Implement SVM_FLAG_PRIVATE_PASID to allocate unique PASIDs
  iommu/vt-d: Add callback to device driver on page faults
  iommu/vt-d: Implement page request handling
  iommu/vt-d: Generalise DMAR MSI setup to allow for page request events
  iommu/vt-d: Implement deferred invalidate for SVM
  iommu/vt-d: Add basic SVM PASID support
  iommu/vt-d: Always enable PASID/PRI PCI capabilities before ATS
  iommu/vt-d: Add initial support for PASID tables
  ...

1  2 
drivers/iommu/Kconfig
drivers/iommu/intel-iommu.c

diff --combined drivers/iommu/Kconfig
index cbe6a890a93a0d1448f46e32edbfdc5231ba7098,be18b214c31efd7eb9de41547812991edde22a50..e1738f666417dceeaff78c08c16f7970f5728916
@@@ -23,7 -23,8 +23,7 @@@ config IOMMU_IO_PGTABL
  config IOMMU_IO_PGTABLE_LPAE
        bool "ARMv7/v8 Long Descriptor Format"
        select IOMMU_IO_PGTABLE
 -      # SWIOTLB guarantees a dma_to_phys() implementation
 -      depends on ARM || ARM64 || (COMPILE_TEST && SWIOTLB)
 +      depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST)
        help
          Enable support for the ARM long descriptor pagetable format.
          This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
@@@ -134,6 -135,16 +134,16 @@@ config INTEL_IOMM
          and include PCI device scope covered by these DMA
          remapping devices.
  
+ config INTEL_IOMMU_SVM
+       bool "Support for Shared Virtual Memory with Intel IOMMU"
+       depends on INTEL_IOMMU && X86
+       select PCI_PASID
+       select MMU_NOTIFIER
+       help
+         Shared Virtual Memory (SVM) provides a facility for devices
+         to access DMA resources through process address space by
+         means of a Process Address Space ID (PASID).
  config INTEL_IOMMU_DEFAULT_ON
        def_bool y
        prompt "Enable Intel DMA Remapping Devices by default"
index d65cf42399e8e5aa0f856a1ab6869c041adc0c48,2973c09c9677133e81044ca8f39e66bc00396ca9..6a10d97f9f6d641c6f89c9a4be7cf10bec86b35c
@@@ -418,10 -418,13 +418,13 @@@ struct device_domain_info 
        struct list_head global; /* link to global list */
        u8 bus;                 /* PCI bus number */
        u8 devfn;               /* PCI devfn number */
-       struct {
-               u8 enabled:1;
-               u8 qdep;
-       } ats;                  /* ATS state */
+       u8 pasid_supported:3;
+       u8 pasid_enabled:1;
+       u8 pri_supported:1;
+       u8 pri_enabled:1;
+       u8 ats_supported:1;
+       u8 ats_enabled:1;
+       u8 ats_qdep;
        struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
        struct intel_iommu *iommu; /* IOMMU used by this device */
        struct dmar_domain *domain; /* pointer to domain */
@@@ -497,13 -500,37 +500,37 @@@ static int dmar_forcedac
  static int intel_iommu_strict;
  static int intel_iommu_superpage = 1;
  static int intel_iommu_ecs = 1;
+ static int intel_iommu_pasid28;
+ static int iommu_identity_mapping;
  
- /* We only actually use ECS when PASID support (on the new bit 40)
-  * is also advertised. Some early implementations — the ones with
-  * PASID support on bit 28 — have issues even when we *only* use
-  * extended root/context tables. */
+ #define IDENTMAP_ALL          1
+ #define IDENTMAP_GFX          2
+ #define IDENTMAP_AZALIA               4
+ /* Broadwell and Skylake have broken ECS support — normal so-called "second
+  * level" translation of DMA requests-without-PASID doesn't actually happen
+  * unless you also set the NESTE bit in an extended context-entry. Which of
+  * course means that SVM doesn't work because it's trying to do nested
+  * translation of the physical addresses it finds in the process page tables,
+  * through the IOVA->phys mapping found in the "second level" page tables.
+  *
+  * The VT-d specification was retroactively changed to change the definition
+  * of the capability bits and pretend that Broadwell/Skylake never happened...
+  * but unfortunately the wrong bit was changed. It's ECS which is broken, but
+  * for some reason it was the PASID capability bit which was redefined (from
+  * bit 28 on BDW/SKL to bit 40 in future).
+  *
+  * So our test for ECS needs to eschew those implementations which set the old
+  * PASID capabiity bit 28, since those are the ones on which ECS is broken.
+  * Unless we are working around the 'pasid28' limitations, that is, by putting
+  * the device into passthrough mode for normal DMA and thus masking the bug.
+  */
  #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
-                           ecap_pasid(iommu->ecap))
+                           (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
+ /* PASID support is thus enabled if ECS is enabled and *either* of the old
+  * or new capability bits are set. */
+ #define pasid_enabled(iommu) (ecs_enabled(iommu) &&                   \
+                             (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
  
  int intel_iommu_gfx_mapped;
  EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
@@@ -566,6 -593,11 +593,11 @@@ static int __init intel_iommu_setup(cha
                        printk(KERN_INFO
                                "Intel-IOMMU: disable extended context table support\n");
                        intel_iommu_ecs = 0;
+               } else if (!strncmp(str, "pasid28", 7)) {
+                       printk(KERN_INFO
+                               "Intel-IOMMU: enable pre-production PASID support\n");
+                       intel_iommu_pasid28 = 1;
+                       iommu_identity_mapping |= IDENTMAP_GFX;
                }
  
                str += strcspn(str, ",");
@@@ -1407,37 -1439,22 +1439,22 @@@ static struct device_domain_info 
  iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
                         u8 bus, u8 devfn)
  {
-       bool found = false;
        struct device_domain_info *info;
-       struct pci_dev *pdev;
  
        assert_spin_locked(&device_domain_lock);
  
-       if (!ecap_dev_iotlb_support(iommu->ecap))
-               return NULL;
        if (!iommu->qi)
                return NULL;
  
        list_for_each_entry(info, &domain->devices, link)
                if (info->iommu == iommu && info->bus == bus &&
                    info->devfn == devfn) {
-                       found = true;
+                       if (info->ats_supported && info->dev)
+                               return info;
                        break;
                }
  
-       if (!found || !info->dev || !dev_is_pci(info->dev))
-               return NULL;
-       pdev = to_pci_dev(info->dev);
-       if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
-               return NULL;
-       if (!dmar_find_matched_atsr_unit(pdev))
-               return NULL;
-       return info;
+       return NULL;
  }
  
  static void iommu_enable_dev_iotlb(struct device_domain_info *info)
                return;
  
        pdev = to_pci_dev(info->dev);
-       if (pci_enable_ats(pdev, VTD_PAGE_SHIFT))
-               return;
  
-       info->ats.enabled = 1;
-       info->ats.qdep = pci_ats_queue_depth(pdev);
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+       /* The PCIe spec, in its wisdom, declares that the behaviour of
+          the device if you enable PASID support after ATS support is
+          undefined. So always enable PASID support on devices which
+          have it, even if we can't yet know if we're ever going to
+          use it. */
+       if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
+               info->pasid_enabled = 1;
+       if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
+               info->pri_enabled = 1;
+ #endif
+       if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
+               info->ats_enabled = 1;
+               info->ats_qdep = pci_ats_queue_depth(pdev);
+       }
  }
  
  static void iommu_disable_dev_iotlb(struct device_domain_info *info)
  {
-       if (!info->ats.enabled)
+       struct pci_dev *pdev;
+       if (dev_is_pci(info->dev))
                return;
  
-       pci_disable_ats(to_pci_dev(info->dev));
-       info->ats.enabled = 0;
+       pdev = to_pci_dev(info->dev);
+       if (info->ats_enabled) {
+               pci_disable_ats(pdev);
+               info->ats_enabled = 0;
+       }
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+       if (info->pri_enabled) {
+               pci_disable_pri(pdev);
+               info->pri_enabled = 0;
+       }
+       if (info->pasid_enabled) {
+               pci_disable_pasid(pdev);
+               info->pasid_enabled = 0;
+       }
+ #endif
  }
  
  static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
  
        spin_lock_irqsave(&device_domain_lock, flags);
        list_for_each_entry(info, &domain->devices, link) {
-               if (!info->ats.enabled)
+               if (!info->ats_enabled)
                        continue;
  
                sid = info->bus << 8 | info->devfn;
-               qdep = info->ats.qdep;
+               qdep = info->ats_qdep;
                qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
        }
        spin_unlock_irqrestore(&device_domain_lock, flags);
@@@ -1667,6 -1712,14 +1712,14 @@@ static void free_dmar_iommu(struct inte
  
        /* free context mapping */
        free_context_table(iommu);
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+       if (pasid_enabled(iommu)) {
+               if (ecap_prs(iommu->ecap))
+                       intel_svm_finish_prq(iommu);
+               intel_svm_free_pasid_tables(iommu);
+       }
+ #endif
  }
  
  static struct dmar_domain *alloc_domain(int flags)
@@@ -1934,8 -1987,10 +1987,10 @@@ static int domain_context_mapping_one(s
                }
  
                info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
-               translation = info ? CONTEXT_TT_DEV_IOTLB :
-                                    CONTEXT_TT_MULTI_LEVEL;
+               if (info && info->ats_supported)
+                       translation = CONTEXT_TT_DEV_IOTLB;
+               else
+                       translation = CONTEXT_TT_MULTI_LEVEL;
  
                context_set_address_root(context, virt_to_phys(pgd));
                context_set_address_width(context, iommu->agaw);
@@@ -2273,12 -2328,34 +2328,34 @@@ static struct dmar_domain *dmar_insert_
  
        info->bus = bus;
        info->devfn = devfn;
-       info->ats.enabled = 0;
-       info->ats.qdep = 0;
+       info->ats_supported = info->pasid_supported = info->pri_supported = 0;
+       info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
+       info->ats_qdep = 0;
        info->dev = dev;
        info->domain = domain;
        info->iommu = iommu;
  
+       if (dev && dev_is_pci(dev)) {
+               struct pci_dev *pdev = to_pci_dev(info->dev);
+               if (ecap_dev_iotlb_support(iommu->ecap) &&
+                   pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
+                   dmar_find_matched_atsr_unit(pdev))
+                       info->ats_supported = 1;
+               if (ecs_enabled(iommu)) {
+                       if (pasid_enabled(iommu)) {
+                               int features = pci_pasid_features(pdev);
+                               if (features >= 0)
+                                       info->pasid_supported = features | 1;
+                       }
+                       if (info->ats_supported && ecap_prs(iommu->ecap) &&
+                           pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
+                               info->pri_supported = 1;
+               }
+       }
        spin_lock_irqsave(&device_domain_lock, flags);
        if (dev)
                found = find_domain(dev);
  
        if (ret) {
                spin_unlock_irqrestore(&device_domain_lock, flags);
 +              free_devinfo_mem(info);
                return NULL;
        }
  
@@@ -2404,11 -2480,6 +2481,6 @@@ found_domain
        return domain;
  }
  
- static int iommu_identity_mapping;
- #define IDENTMAP_ALL          1
- #define IDENTMAP_GFX          2
- #define IDENTMAP_AZALIA               4
  static int iommu_domain_identity_map(struct dmar_domain *domain,
                                     unsigned long long start,
                                     unsigned long long end)
@@@ -3100,6 -3171,10 +3172,10 @@@ static int __init init_dmars(void
  
                if (!ecap_pass_through(iommu->ecap))
                        hw_pass_through = 0;
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+               if (pasid_enabled(iommu))
+                       intel_svm_alloc_pasid_tables(iommu);
+ #endif
        }
  
        if (iommu_pass_through)
@@@ -3187,6 -3262,13 +3263,13 @@@ domains_done
  
                iommu_flush_write_buffer(iommu);
  
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+               if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
+                       ret = intel_svm_enable_prq(iommu);
+                       if (ret)
+                               goto free_iommu;
+               }
+ #endif
                ret = dmar_set_interrupt(iommu);
                if (ret)
                        goto free_iommu;
@@@ -4115,6 -4197,11 +4198,11 @@@ static int intel_iommu_add(struct dmar_
        if (ret)
                goto out;
  
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+       if (pasid_enabled(iommu))
+               intel_svm_alloc_pasid_tables(iommu);
+ #endif
        if (dmaru->ignored) {
                /*
                 * we always have to disable PMRs or DMA may fail on this device
  
        intel_iommu_init_qi(iommu);
        iommu_flush_write_buffer(iommu);
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+       if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
+               ret = intel_svm_enable_prq(iommu);
+               if (ret)
+                       goto disable_iommu;
+       }
+ #endif
        ret = dmar_set_interrupt(iommu);
        if (ret)
                goto disable_iommu;
@@@ -4194,14 -4289,17 +4290,17 @@@ int dmar_find_matched_atsr_unit(struct 
        dev = pci_physfn(dev);
        for (bus = dev->bus; bus; bus = bus->parent) {
                bridge = bus->self;
-               if (!bridge || !pci_is_pcie(bridge) ||
+               /* If it's an integrated device, allow ATS */
+               if (!bridge)
+                       return 1;
+               /* Connected via non-PCIe: no ATS */
+               if (!pci_is_pcie(bridge) ||
                    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
                        return 0;
+               /* If we found the root port, look it up in the ATSR */
                if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
                        break;
        }
-       if (!bridge)
-               return 0;
  
        rcu_read_lock();
        list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
@@@ -4865,6 -4963,114 +4964,114 @@@ static void intel_iommu_remove_device(s
        iommu_device_unlink(iommu->iommu_dev, dev);
  }
  
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
+ {
+       struct device_domain_info *info;
+       struct context_entry *context;
+       struct dmar_domain *domain;
+       unsigned long flags;
+       u64 ctx_lo;
+       int ret;
+       domain = get_valid_domain_for_dev(sdev->dev);
+       if (!domain)
+               return -EINVAL;
+       spin_lock_irqsave(&device_domain_lock, flags);
+       spin_lock(&iommu->lock);
+       ret = -EINVAL;
+       info = sdev->dev->archdata.iommu;
+       if (!info || !info->pasid_supported)
+               goto out;
+       context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
+       if (WARN_ON(!context))
+               goto out;
+       ctx_lo = context[0].lo;
+       sdev->did = domain->iommu_did[iommu->seq_id];
+       sdev->sid = PCI_DEVID(info->bus, info->devfn);
+       if (!(ctx_lo & CONTEXT_PASIDE)) {
+               context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
+               context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
+               wmb();
+               /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
+                * extended to permit requests-with-PASID if the PASIDE bit
+                * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
+                * however, the PASIDE bit is ignored and requests-with-PASID
+                * are unconditionally blocked. Which makes less sense.
+                * So convert from CONTEXT_TT_PASS_THROUGH to one of the new
+                * "guest mode" translation types depending on whether ATS
+                * is available or not. Annoyingly, we can't use the new
+                * modes *unless* PASIDE is set. */
+               if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
+                       ctx_lo &= ~CONTEXT_TT_MASK;
+                       if (info->ats_supported)
+                               ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
+                       else
+                               ctx_lo |= CONTEXT_TT_PT_PASID << 2;
+               }
+               ctx_lo |= CONTEXT_PASIDE;
+               if (iommu->pasid_state_table)
+                       ctx_lo |= CONTEXT_DINVE;
+               if (info->pri_supported)
+                       ctx_lo |= CONTEXT_PRS;
+               context[0].lo = ctx_lo;
+               wmb();
+               iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
+                                          DMA_CCMD_MASK_NOBIT,
+                                          DMA_CCMD_DEVICE_INVL);
+       }
+       /* Enable PASID support in the device, if it wasn't already */
+       if (!info->pasid_enabled)
+               iommu_enable_dev_iotlb(info);
+       if (info->ats_enabled) {
+               sdev->dev_iotlb = 1;
+               sdev->qdep = info->ats_qdep;
+               if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
+                       sdev->qdep = 0;
+       }
+       ret = 0;
+  out:
+       spin_unlock(&iommu->lock);
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+       return ret;
+ }
+ struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
+ {
+       struct intel_iommu *iommu;
+       u8 bus, devfn;
+       if (iommu_dummy(dev)) {
+               dev_warn(dev,
+                        "No IOMMU translation for device; cannot enable SVM\n");
+               return NULL;
+       }
+       iommu = device_to_iommu(dev, &bus, &devfn);
+       if ((!iommu)) {
+               dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
+               return NULL;
+       }
+       if (!iommu->pasid_table) {
+               dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
+               return NULL;
+       }
+       return iommu;
+ }
+ #endif /* CONFIG_INTEL_IOMMU_SVM */
  static const struct iommu_ops intel_iommu_ops = {
        .capable        = intel_iommu_capable,
        .domain_alloc   = intel_iommu_domain_alloc,