Merge branches 'arm/exynos', 'arm/renesas', 'arm/rockchip', 'arm/omap', 'arm/mediatek...
[sfrench/cifs-2.6.git] / drivers / iommu / intel-iommu.c
index 687f18f65cea58d2a5f22725a2c36c78621dd3cd..2be8e23448eeb8615596fecb4b39363522919d8a 100644 (file)
@@ -458,31 +458,6 @@ static LIST_HEAD(dmar_rmrr_units);
 #define for_each_rmrr_units(rmrr) \
        list_for_each_entry(rmrr, &dmar_rmrr_units, list)
 
-static void flush_unmaps_timeout(unsigned long data);
-
-struct deferred_flush_entry {
-       unsigned long iova_pfn;
-       unsigned long nrpages;
-       struct dmar_domain *domain;
-       struct page *freelist;
-};
-
-#define HIGH_WATER_MARK 250
-struct deferred_flush_table {
-       int next;
-       struct deferred_flush_entry entries[HIGH_WATER_MARK];
-};
-
-struct deferred_flush_data {
-       spinlock_t lock;
-       int timer_on;
-       struct timer_list timer;
-       long size;
-       struct deferred_flush_table *tables;
-};
-
-static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
-
 /* bitmap for indexing intel_iommus */
 static int g_num_of_iommus;
 
@@ -974,20 +949,6 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
        return ret;
 }
 
-static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
-{
-       struct context_entry *context;
-       unsigned long flags;
-
-       spin_lock_irqsave(&iommu->lock, flags);
-       context = iommu_context_addr(iommu, bus, devfn, 0);
-       if (context) {
-               context_clear_entry(context);
-               __iommu_flush_cache(iommu, context, sizeof(*context));
-       }
-       spin_unlock_irqrestore(&iommu->lock, flags);
-}
-
 static void free_context_table(struct intel_iommu *iommu)
 {
        int i;
@@ -1137,8 +1098,9 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
 }
 
 static void dma_pte_free_level(struct dmar_domain *domain, int level,
-                              struct dma_pte *pte, unsigned long pfn,
-                              unsigned long start_pfn, unsigned long last_pfn)
+                              int retain_level, struct dma_pte *pte,
+                              unsigned long pfn, unsigned long start_pfn,
+                              unsigned long last_pfn)
 {
        pfn = max(start_pfn, pfn);
        pte = &pte[pfn_level_offset(pfn, level)];
@@ -1153,12 +1115,17 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,
                level_pfn = pfn & level_mask(level);
                level_pte = phys_to_virt(dma_pte_addr(pte));
 
-               if (level > 2)
-                       dma_pte_free_level(domain, level - 1, level_pte,
-                                          level_pfn, start_pfn, last_pfn);
+               if (level > 2) {
+                       dma_pte_free_level(domain, level - 1, retain_level,
+                                          level_pte, level_pfn, start_pfn,
+                                          last_pfn);
+               }
 
-               /* If range covers entire pagetable, free it */
-               if (!(start_pfn > level_pfn ||
+               /*
+                * Free the page table if we're below the level we want to
+                * retain and the range covers the entire table.
+                */
+               if (level < retain_level && !(start_pfn > level_pfn ||
                      last_pfn < level_pfn + level_size(level) - 1)) {
                        dma_clear_pte(pte);
                        domain_flush_cache(domain, pte, sizeof(*pte));
@@ -1169,10 +1136,14 @@ next:
        } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
 }
 
-/* clear last level (leaf) ptes and free page table pages. */
+/*
+ * clear last level (leaf) ptes and free page table pages below the
+ * level we wish to keep intact.
+ */
 static void dma_pte_free_pagetable(struct dmar_domain *domain,
                                   unsigned long start_pfn,
-                                  unsigned long last_pfn)
+                                  unsigned long last_pfn,
+                                  int retain_level)
 {
        BUG_ON(!domain_pfn_supported(domain, start_pfn));
        BUG_ON(!domain_pfn_supported(domain, last_pfn));
@@ -1181,7 +1152,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
        dma_pte_clear_range(domain, start_pfn, last_pfn);
 
        /* We don't need lock here; nobody else touches the iova range */
-       dma_pte_free_level(domain, agaw_to_level(domain->agaw),
+       dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
                           domain->pgd, 0, start_pfn, last_pfn);
 
        /* free pgd */
@@ -1309,6 +1280,13 @@ static void dma_free_pagelist(struct page *freelist)
        }
 }
 
+static void iova_entry_free(unsigned long data)
+{
+       struct page *freelist = (struct page *)data;
+
+       dma_free_pagelist(freelist);
+}
+
 /* iommu handling */
 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 {
@@ -1622,6 +1600,25 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
                                      addr, mask);
 }
 
+static void iommu_flush_iova(struct iova_domain *iovad)
+{
+       struct dmar_domain *domain;
+       int idx;
+
+       domain = container_of(iovad, struct dmar_domain, iovad);
+
+       for_each_domain_iommu(idx, domain) {
+               struct intel_iommu *iommu = g_iommus[idx];
+               u16 did = domain->iommu_did[iommu->seq_id];
+
+               iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+
+               if (!cap_caching_mode(iommu->cap))
+                       iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
+                                             0, MAX_AGAW_PFN_WIDTH);
+       }
+}
+
 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
 {
        u32 pmen;
@@ -1932,9 +1929,16 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
 {
        int adjust_width, agaw;
        unsigned long sagaw;
+       int err;
 
        init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
                        DMA_32BIT_PFN);
+
+       err = init_iova_flush_queue(&domain->iovad,
+                                   iommu_flush_iova, iova_entry_free);
+       if (err)
+               return err;
+
        domain_reserve_special_ranges(domain);
 
        /* calculate AGAW */
@@ -1986,14 +1990,6 @@ static void domain_exit(struct dmar_domain *domain)
        if (!domain)
                return;
 
-       /* Flush any lazy unmaps that may reference this domain */
-       if (!intel_iommu_strict) {
-               int cpu;
-
-               for_each_possible_cpu(cpu)
-                       flush_unmaps_timeout(cpu);
-       }
-
        /* Remove associated devices and clear attached or cached domains */
        rcu_read_lock();
        domain_remove_dev_info(domain);
@@ -2277,8 +2273,11 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
                                /*
                                 * Ensure that old small page tables are
                                 * removed to make room for superpage(s).
+                                * We're adding new large pages, so make sure
+                                * we don't remove their parent tables.
                                 */
-                               dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
+                               dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
+                                                      largepage_lvl + 1);
                        } else {
                                pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
                        }
@@ -2351,13 +2350,33 @@ static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long i
 
 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
 {
+       unsigned long flags;
+       struct context_entry *context;
+       u16 did_old;
+
        if (!iommu)
                return;
 
-       clear_context_table(iommu, bus, devfn);
-       iommu->flush.flush_context(iommu, 0, 0, 0,
-                                          DMA_CCMD_GLOBAL_INVL);
-       iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+       spin_lock_irqsave(&iommu->lock, flags);
+       context = iommu_context_addr(iommu, bus, devfn, 0);
+       if (!context) {
+               spin_unlock_irqrestore(&iommu->lock, flags);
+               return;
+       }
+       did_old = context_domain_id(context);
+       context_clear_entry(context);
+       __iommu_flush_cache(iommu, context, sizeof(*context));
+       spin_unlock_irqrestore(&iommu->lock, flags);
+       iommu->flush.flush_context(iommu,
+                                  did_old,
+                                  (((u16)bus) << 8) | devfn,
+                                  DMA_CCMD_MASK_NOBIT,
+                                  DMA_CCMD_DEVICE_INVL);
+       iommu->flush.flush_iotlb(iommu,
+                                did_old,
+                                0,
+                                0,
+                                DMA_TLB_DSI_FLUSH);
 }
 
 static inline void unlink_domain_info(struct device_domain_info *info)
@@ -3206,7 +3225,7 @@ static int __init init_dmars(void)
        bool copied_tables = false;
        struct device *dev;
        struct intel_iommu *iommu;
-       int i, ret, cpu;
+       int i, ret;
 
        /*
         * for each drhd
@@ -3239,22 +3258,6 @@ static int __init init_dmars(void)
                goto error;
        }
 
-       for_each_possible_cpu(cpu) {
-               struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush,
-                                                             cpu);
-
-               dfd->tables = kzalloc(g_num_of_iommus *
-                                     sizeof(struct deferred_flush_table),
-                                     GFP_KERNEL);
-               if (!dfd->tables) {
-                       ret = -ENOMEM;
-                       goto free_g_iommus;
-               }
-
-               spin_lock_init(&dfd->lock);
-               setup_timer(&dfd->timer, flush_unmaps_timeout, cpu);
-       }
-
        for_each_active_iommu(iommu, drhd) {
                g_iommus[iommu->seq_id] = iommu;
 
@@ -3437,10 +3440,9 @@ free_iommu:
                disable_dmar_iommu(iommu);
                free_dmar_iommu(iommu);
        }
-free_g_iommus:
-       for_each_possible_cpu(cpu)
-               kfree(per_cpu_ptr(&deferred_flush, cpu)->tables);
+
        kfree(g_iommus);
+
 error:
        return ret;
 }
@@ -3645,110 +3647,6 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,
                                  dir, *dev->dma_mask);
 }
 
-static void flush_unmaps(struct deferred_flush_data *flush_data)
-{
-       int i, j;
-
-       flush_data->timer_on = 0;
-
-       /* just flush them all */
-       for (i = 0; i < g_num_of_iommus; i++) {
-               struct intel_iommu *iommu = g_iommus[i];
-               struct deferred_flush_table *flush_table =
-                               &flush_data->tables[i];
-               if (!iommu)
-                       continue;
-
-               if (!flush_table->next)
-                       continue;
-
-               /* In caching mode, global flushes turn emulation expensive */
-               if (!cap_caching_mode(iommu->cap))
-                       iommu->flush.flush_iotlb(iommu, 0, 0, 0,
-                                        DMA_TLB_GLOBAL_FLUSH);
-               for (j = 0; j < flush_table->next; j++) {
-                       unsigned long mask;
-                       struct deferred_flush_entry *entry =
-                                               &flush_table->entries[j];
-                       unsigned long iova_pfn = entry->iova_pfn;
-                       unsigned long nrpages = entry->nrpages;
-                       struct dmar_domain *domain = entry->domain;
-                       struct page *freelist = entry->freelist;
-
-                       /* On real hardware multiple invalidations are expensive */
-                       if (cap_caching_mode(iommu->cap))
-                               iommu_flush_iotlb_psi(iommu, domain,
-                                       mm_to_dma_pfn(iova_pfn),
-                                       nrpages, !freelist, 0);
-                       else {
-                               mask = ilog2(nrpages);
-                               iommu_flush_dev_iotlb(domain,
-                                               (uint64_t)iova_pfn << PAGE_SHIFT, mask);
-                       }
-                       free_iova_fast(&domain->iovad, iova_pfn, nrpages);
-                       if (freelist)
-                               dma_free_pagelist(freelist);
-               }
-               flush_table->next = 0;
-       }
-
-       flush_data->size = 0;
-}
-
-static void flush_unmaps_timeout(unsigned long cpuid)
-{
-       struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid);
-       unsigned long flags;
-
-       spin_lock_irqsave(&flush_data->lock, flags);
-       flush_unmaps(flush_data);
-       spin_unlock_irqrestore(&flush_data->lock, flags);
-}
-
-static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
-                     unsigned long nrpages, struct page *freelist)
-{
-       unsigned long flags;
-       int entry_id, iommu_id;
-       struct intel_iommu *iommu;
-       struct deferred_flush_entry *entry;
-       struct deferred_flush_data *flush_data;
-
-       flush_data = raw_cpu_ptr(&deferred_flush);
-
-       /* Flush all CPUs' entries to avoid deferring too much.  If
-        * this becomes a bottleneck, can just flush us, and rely on
-        * flush timer for the rest.
-        */
-       if (flush_data->size == HIGH_WATER_MARK) {
-               int cpu;
-
-               for_each_online_cpu(cpu)
-                       flush_unmaps_timeout(cpu);
-       }
-
-       spin_lock_irqsave(&flush_data->lock, flags);
-
-       iommu = domain_get_iommu(dom);
-       iommu_id = iommu->seq_id;
-
-       entry_id = flush_data->tables[iommu_id].next;
-       ++(flush_data->tables[iommu_id].next);
-
-       entry = &flush_data->tables[iommu_id].entries[entry_id];
-       entry->domain = dom;
-       entry->iova_pfn = iova_pfn;
-       entry->nrpages = nrpages;
-       entry->freelist = freelist;
-
-       if (!flush_data->timer_on) {
-               mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10));
-               flush_data->timer_on = 1;
-       }
-       flush_data->size++;
-       spin_unlock_irqrestore(&flush_data->lock, flags);
-}
-
 static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
 {
        struct dmar_domain *domain;
@@ -3784,7 +3682,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
                free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
                dma_free_pagelist(freelist);
        } else {
-               add_unmap(domain, iova_pfn, nrpages, freelist);
+               queue_iova(&domain->iovad, iova_pfn, nrpages,
+                          (unsigned long)freelist);
                /*
                 * queue up the release of the unmap to save the 1/6th of the
                 * cpu used up by the iotlb flush operation...
@@ -3938,7 +3837,8 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
        ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
        if (unlikely(ret)) {
                dma_pte_free_pagetable(domain, start_vpfn,
-                                      start_vpfn + size - 1);
+                                      start_vpfn + size - 1,
+                                      agaw_to_level(domain->agaw) + 1);
                free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
                return 0;
        }
@@ -4721,7 +4621,6 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
 static int intel_iommu_cpu_dead(unsigned int cpu)
 {
        free_all_cpu_cached_iovas(cpu);
-       flush_unmaps_timeout(cpu);
        return 0;
 }
 
@@ -4736,7 +4635,9 @@ static void intel_disable_iommus(void)
 
 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
 {
-       return container_of(dev, struct intel_iommu, iommu.dev);
+       struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
+
+       return container_of(iommu_dev, struct intel_iommu, iommu);
 }
 
 static ssize_t intel_iommu_show_version(struct device *dev,
@@ -5341,7 +5242,8 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
        sdev->sid = PCI_DEVID(info->bus, info->devfn);
 
        if (!(ctx_lo & CONTEXT_PASIDE)) {
-               context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
+               if (iommu->pasid_state_table)
+                       context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
                context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
                        intel_iommu_get_pts(iommu);