Merge tag 'iommu-fixes-5.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / drivers / iommu / amd_iommu.c
index 97975bb7f34735311cf606446f68a02b6b59b7ae..2369b8af81f3b7e4d829a75ef979fb1858f0682f 100644 (file)
@@ -70,7 +70,6 @@
  */
 #define AMD_IOMMU_PGSIZES      ((~0xFFFUL) & ~(2ULL << 38))
 
-static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
 static DEFINE_SPINLOCK(pd_bitmap_lock);
 
 /* List of all available dev_data structures */
@@ -202,6 +201,7 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
        if (!dev_data)
                return NULL;
 
+       spin_lock_init(&dev_data->lock);
        dev_data->devid = devid;
        ratelimit_default_init(&dev_data->rs);
 
@@ -501,6 +501,29 @@ static void iommu_uninit_device(struct device *dev)
         */
 }
 
+/*
+ * Helper function to get the first pte of a large mapping
+ */
+static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
+                        unsigned long *count)
+{
+       unsigned long pte_mask, pg_size, cnt;
+       u64 *fpte;
+
+       pg_size  = PTE_PAGE_SIZE(*pte);
+       cnt      = PAGE_SIZE_PTE_COUNT(pg_size);
+       pte_mask = ~((cnt << 3) - 1);
+       fpte     = (u64 *)(((unsigned long)pte) & pte_mask);
+
+       if (page_size)
+               *page_size = pg_size;
+
+       if (count)
+               *count = cnt;
+
+       return fpte;
+}
+
 /****************************************************************************
  *
  * Interrupt handling functions
@@ -1311,8 +1334,12 @@ static void domain_flush_np_cache(struct protection_domain *domain,
                dma_addr_t iova, size_t size)
 {
        if (unlikely(amd_iommu_np_cache)) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&domain->lock, flags);
                domain_flush_pages(domain, iova, size);
                domain_flush_complete(domain);
+               spin_unlock_irqrestore(&domain->lock, flags);
        }
 }
 
@@ -1425,7 +1452,7 @@ static void free_pagetable(struct protection_domain *domain)
        BUG_ON(domain->mode < PAGE_MODE_NONE ||
               domain->mode > PAGE_MODE_6_LEVEL);
 
-       free_sub_pt(root, domain->mode, freelist);
+       freelist = free_sub_pt(root, domain->mode, freelist);
 
        free_page_list(freelist);
 }
@@ -1435,10 +1462,11 @@ static void free_pagetable(struct protection_domain *domain)
  * another level increases the size of the address space by 9 bits to a size up
  * to 64 bits.
  */
-static void increase_address_space(struct protection_domain *domain,
+static bool increase_address_space(struct protection_domain *domain,
                                   gfp_t gfp)
 {
        unsigned long flags;
+       bool ret = false;
        u64 *pte;
 
        spin_lock_irqsave(&domain->lock, flags);
@@ -1455,19 +1483,21 @@ static void increase_address_space(struct protection_domain *domain,
                                        iommu_virt_to_phys(domain->pt_root));
        domain->pt_root  = pte;
        domain->mode    += 1;
-       domain->updated  = true;
+
+       ret = true;
 
 out:
        spin_unlock_irqrestore(&domain->lock, flags);
 
-       return;
+       return ret;
 }
 
 static u64 *alloc_pte(struct protection_domain *domain,
                      unsigned long address,
                      unsigned long page_size,
                      u64 **pte_page,
-                     gfp_t gfp)
+                     gfp_t gfp,
+                     bool *updated)
 {
        int level, end_lvl;
        u64 *pte, *page;
@@ -1475,7 +1505,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
        BUG_ON(!is_power_of_2(page_size));
 
        while (address > PM_LEVEL_SIZE(domain->mode))
-               increase_address_space(domain, gfp);
+               *updated = increase_address_space(domain, gfp) || *updated;
 
        level   = domain->mode - 1;
        pte     = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
@@ -1489,9 +1519,32 @@ static u64 *alloc_pte(struct protection_domain *domain,
                __pte     = *pte;
                pte_level = PM_PTE_LEVEL(__pte);
 
-               if (!IOMMU_PTE_PRESENT(__pte) ||
+               /*
+                * If we replace a series of large PTEs, we need
+                * to tear down all of them.
+                */
+               if (IOMMU_PTE_PRESENT(__pte) &&
                    pte_level == PAGE_MODE_7_LEVEL) {
+                       unsigned long count, i;
+                       u64 *lpte;
+
+                       lpte = first_pte_l7(pte, NULL, &count);
+
+                       /*
+                        * Unmap the replicated PTEs that still match the
+                        * original large mapping
+                        */
+                       for (i = 0; i < count; ++i)
+                               cmpxchg64(&lpte[i], __pte, 0ULL);
+
+                       *updated = true;
+                       continue;
+               }
+
+               if (!IOMMU_PTE_PRESENT(__pte) ||
+                   pte_level == PAGE_MODE_NONE) {
                        page = (u64 *)get_zeroed_page(gfp);
+
                        if (!page)
                                return NULL;
 
@@ -1500,8 +1553,8 @@ static u64 *alloc_pte(struct protection_domain *domain,
                        /* pte could have been changed somewhere. */
                        if (cmpxchg64(pte, __pte, __npte) != __pte)
                                free_page((unsigned long)page);
-                       else if (pte_level == PAGE_MODE_7_LEVEL)
-                               domain->updated = true;
+                       else if (IOMMU_PTE_PRESENT(__pte))
+                               *updated = true;
 
                        continue;
                }
@@ -1566,17 +1619,12 @@ static u64 *fetch_pte(struct protection_domain *domain,
                *page_size = PTE_LEVEL_PAGE_SIZE(level);
        }
 
-       if (PM_PTE_LEVEL(*pte) == 0x07) {
-               unsigned long pte_mask;
-
-               /*
-                * If we have a series of large PTEs, make
-                * sure to return a pointer to the first one.
-                */
-               *page_size = pte_mask = PTE_PAGE_SIZE(*pte);
-               pte_mask   = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
-               pte        = (u64 *)(((unsigned long)pte) & pte_mask);
-       }
+       /*
+        * If we have a series of large PTEs, make
+        * sure to return a pointer to the first one.
+        */
+       if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
+               pte = first_pte_l7(pte, page_size, NULL);
 
        return pte;
 }
@@ -1615,26 +1663,29 @@ static int iommu_map_page(struct protection_domain *dom,
                          gfp_t gfp)
 {
        struct page *freelist = NULL;
+       bool updated = false;
        u64 __pte, *pte;
-       int i, count;
+       int ret, i, count;
 
        BUG_ON(!IS_ALIGNED(bus_addr, page_size));
        BUG_ON(!IS_ALIGNED(phys_addr, page_size));
 
+       ret = -EINVAL;
        if (!(prot & IOMMU_PROT_MASK))
-               return -EINVAL;
+               goto out;
 
        count = PAGE_SIZE_PTE_COUNT(page_size);
-       pte   = alloc_pte(dom, bus_addr, page_size, NULL, gfp);
+       pte   = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
 
+       ret = -ENOMEM;
        if (!pte)
-               return -ENOMEM;
+               goto out;
 
        for (i = 0; i < count; ++i)
                freelist = free_clear_pte(&pte[i], pte[i], freelist);
 
        if (freelist != NULL)
-               dom->updated = true;
+               updated = true;
 
        if (count > 1) {
                __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
@@ -1650,12 +1701,21 @@ static int iommu_map_page(struct protection_domain *dom,
        for (i = 0; i < count; ++i)
                pte[i] = __pte;
 
-       update_domain(dom);
+       ret = 0;
+
+out:
+       if (updated) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&dom->lock, flags);
+               update_domain(dom);
+               spin_unlock_irqrestore(&dom->lock, flags);
+       }
 
        /* Everything flushed out, free pages now */
        free_page_list(freelist);
 
-       return 0;
+       return ret;
 }
 
 static unsigned long iommu_unmap_page(struct protection_domain *dom,
@@ -1806,8 +1866,12 @@ static void free_gcr3_table(struct protection_domain *domain)
 
 static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&dom->domain.lock, flags);
        domain_flush_tlb(&dom->domain);
        domain_flush_complete(&dom->domain);
+       spin_unlock_irqrestore(&dom->domain.lock, flags);
 }
 
 static void iova_domain_flush_tlb(struct iova_domain *iovad)
@@ -2022,36 +2086,6 @@ static void do_detach(struct iommu_dev_data *dev_data)
        domain->dev_cnt                 -= 1;
 }
 
-/*
- * If a device is not yet associated with a domain, this function makes the
- * device visible in the domain
- */
-static int __attach_device(struct iommu_dev_data *dev_data,
-                          struct protection_domain *domain)
-{
-       int ret;
-
-       /* lock domain */
-       spin_lock(&domain->lock);
-
-       ret = -EBUSY;
-       if (dev_data->domain != NULL)
-               goto out_unlock;
-
-       /* Attach alias group root */
-       do_attach(dev_data, domain);
-
-       ret = 0;
-
-out_unlock:
-
-       /* ready */
-       spin_unlock(&domain->lock);
-
-       return ret;
-}
-
-
 static void pdev_iommuv2_disable(struct pci_dev *pdev)
 {
        pci_disable_ats(pdev);
@@ -2133,19 +2167,28 @@ static int attach_device(struct device *dev,
        unsigned long flags;
        int ret;
 
+       spin_lock_irqsave(&domain->lock, flags);
+
        dev_data = get_dev_data(dev);
 
+       spin_lock(&dev_data->lock);
+
+       ret = -EBUSY;
+       if (dev_data->domain != NULL)
+               goto out;
+
        if (!dev_is_pci(dev))
                goto skip_ats_check;
 
        pdev = to_pci_dev(dev);
        if (domain->flags & PD_IOMMUV2_MASK) {
+               ret = -EINVAL;
                if (!dev_data->passthrough)
-                       return -EINVAL;
+                       goto out;
 
                if (dev_data->iommu_v2) {
                        if (pdev_iommuv2_enable(pdev) != 0)
-                               return -EINVAL;
+                               goto out;
 
                        dev_data->ats.enabled = true;
                        dev_data->ats.qdep    = pci_ats_queue_depth(pdev);
@@ -2158,9 +2201,9 @@ static int attach_device(struct device *dev,
        }
 
 skip_ats_check:
-       spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
-       ret = __attach_device(dev_data, domain);
-       spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+       ret = 0;
+
+       do_attach(dev_data, domain);
 
        /*
         * We might boot into a crash-kernel here. The crashed kernel
@@ -2169,23 +2212,14 @@ skip_ats_check:
         */
        domain_flush_tlb_pde(domain);
 
-       return ret;
-}
-
-/*
- * Removes a device from a protection domain (unlocked)
- */
-static void __detach_device(struct iommu_dev_data *dev_data)
-{
-       struct protection_domain *domain;
-
-       domain = dev_data->domain;
+       domain_flush_complete(domain);
 
-       spin_lock(&domain->lock);
+out:
+       spin_unlock(&dev_data->lock);
 
-       do_detach(dev_data);
+       spin_unlock_irqrestore(&domain->lock, flags);
 
-       spin_unlock(&domain->lock);
+       return ret;
 }
 
 /*
@@ -2200,6 +2234,10 @@ static void detach_device(struct device *dev)
        dev_data = get_dev_data(dev);
        domain   = dev_data->domain;
 
+       spin_lock_irqsave(&domain->lock, flags);
+
+       spin_lock(&dev_data->lock);
+
        /*
         * First check if the device is still attached. It might already
         * be detached from its domain because the generic
@@ -2207,15 +2245,12 @@ static void detach_device(struct device *dev)
         * our alias handling.
         */
        if (WARN_ON(!dev_data->domain))
-               return;
+               goto out;
 
-       /* lock device table */
-       spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
-       __detach_device(dev_data);
-       spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+       do_detach(dev_data);
 
        if (!dev_is_pci(dev))
-               return;
+               goto out;
 
        if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
                pdev_iommuv2_disable(to_pci_dev(dev));
@@ -2223,6 +2258,11 @@ static void detach_device(struct device *dev)
                pci_disable_ats(to_pci_dev(dev));
 
        dev_data->ats.enabled = false;
+
+out:
+       spin_unlock(&dev_data->lock);
+
+       spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 static int amd_iommu_add_device(struct device *dev)
@@ -2354,15 +2394,10 @@ static void update_device_table(struct protection_domain *domain)
 
 static void update_domain(struct protection_domain *domain)
 {
-       if (!domain->updated)
-               return;
-
        update_device_table(domain);
 
        domain_flush_devices(domain);
        domain_flush_tlb_pde(domain);
-
-       domain->updated = false;
 }
 
 static int dir2prot(enum dma_data_direction direction)
@@ -2392,6 +2427,7 @@ static dma_addr_t __map_single(struct device *dev,
 {
        dma_addr_t offset = paddr & ~PAGE_MASK;
        dma_addr_t address, start, ret;
+       unsigned long flags;
        unsigned int pages;
        int prot = 0;
        int i;
@@ -2429,8 +2465,10 @@ out_unmap:
                iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
        }
 
+       spin_lock_irqsave(&dma_dom->domain.lock, flags);
        domain_flush_tlb(&dma_dom->domain);
        domain_flush_complete(&dma_dom->domain);
+       spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
 
        dma_ops_free_iova(dma_dom, address, pages);
 
@@ -2459,8 +2497,12 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
        }
 
        if (amd_iommu_unmap_flush) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&dma_dom->domain.lock, flags);
                domain_flush_tlb(&dma_dom->domain);
                domain_flush_complete(&dma_dom->domain);
+               spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
                dma_ops_free_iova(dma_dom, dma_addr, pages);
        } else {
                pages = __roundup_pow_of_two(pages);
@@ -2866,16 +2908,16 @@ static void cleanup_domain(struct protection_domain *domain)
        struct iommu_dev_data *entry;
        unsigned long flags;
 
-       spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
+       spin_lock_irqsave(&domain->lock, flags);
 
        while (!list_empty(&domain->dev_list)) {
                entry = list_first_entry(&domain->dev_list,
                                         struct iommu_dev_data, list);
                BUG_ON(!entry->domain);
-               __detach_device(entry);
+               do_detach(entry);
        }
 
-       spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+       spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 static void protection_domain_free(struct protection_domain *domain)
@@ -3226,9 +3268,12 @@ static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
 static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
 {
        struct protection_domain *dom = to_pdomain(domain);
+       unsigned long flags;
 
+       spin_lock_irqsave(&dom->lock, flags);
        domain_flush_tlb_pde(dom);
        domain_flush_complete(dom);
+       spin_unlock_irqrestore(&dom->lock, flags);
 }
 
 static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
@@ -3290,7 +3335,6 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom)
 
        /* Update data structure */
        domain->mode    = PAGE_MODE_NONE;
-       domain->updated = true;
 
        /* Make changes visible to IOMMUs */
        update_domain(domain);
@@ -3336,7 +3380,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
 
        domain->glx      = levels;
        domain->flags   |= PD_IOMMUV2_MASK;
-       domain->updated  = true;
 
        update_domain(domain);