drm/tegra: Reuse IOVA mapping where possible
authorThierry Reding <treding@nvidia.com>
Tue, 4 Feb 2020 13:59:25 +0000 (14:59 +0100)
committerThierry Reding <treding@nvidia.com>
Thu, 6 Feb 2020 17:21:55 +0000 (18:21 +0100)
This partially reverts the DMA API support that was recently merged
because it was causing performance regressions on older Tegra devices.
Unfortunately, the cache maintenance performed by dma_map_sg() and
dma_unmap_sg() causes performance to drop by a factor of 10.

The right solution for this would be to cache mappings for buffers per
consumer device, but that's a bit involved. Instead, we simply revert to
the old behaviour of sharing IOVA mappings when we know that devices can
do so (i.e. they share the same IOMMU domain).

Cc: <stable@vger.kernel.org> # v5.5
Reported-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
drivers/gpu/drm/tegra/gem.c
drivers/gpu/drm/tegra/plane.c
drivers/gpu/host1x/job.c

index bc15b430156d0452277edddb842e28792aaa9606..c46b4d4190acff70886974066d04eda93f91ece3 100644 (file)
@@ -60,8 +60,16 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
        /*
         * If we've manually mapped the buffer object through the IOMMU, make
         * sure to return the IOVA address of our mapping.
+        *
+        * Similarly, for buffers that have been allocated by the DMA API the
+        * physical address can be used for devices that are not attached to
+        * an IOMMU. For these devices, callers must pass a valid pointer via
+        * the @phys argument.
+        *
+        * Imported buffers were also already mapped at import time, so the
+        * existing mapping can be reused.
         */
-       if (phys && obj->mm) {
+       if (phys) {
                *phys = obj->iova;
                return NULL;
        }
index cadcdd9ea427bf7d16df10b8673130c967ee8579..9ccfb56e9b01f4954e123c57f436df741f125030 100644 (file)
@@ -3,6 +3,8 @@
  * Copyright (C) 2017 NVIDIA CORPORATION.  All rights reserved.
  */
 
+#include <linux/iommu.h>
+
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_fourcc.h>
@@ -107,21 +109,27 @@ const struct drm_plane_funcs tegra_plane_funcs = {
 
 static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state)
 {
+       struct iommu_domain *domain = iommu_get_domain_for_dev(dc->dev);
        unsigned int i;
        int err;
 
        for (i = 0; i < state->base.fb->format->num_planes; i++) {
                struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
+               dma_addr_t phys_addr, *phys;
+               struct sg_table *sgt;
 
-               if (!dc->client.group) {
-                       struct sg_table *sgt;
+               if (!domain || dc->client.group)
+                       phys = &phys_addr;
+               else
+                       phys = NULL;
 
-                       sgt = host1x_bo_pin(dc->dev, &bo->base, NULL);
-                       if (IS_ERR(sgt)) {
-                               err = PTR_ERR(sgt);
-                               goto unpin;
-                       }
+               sgt = host1x_bo_pin(dc->dev, &bo->base, phys);
+               if (IS_ERR(sgt)) {
+                       err = PTR_ERR(sgt);
+                       goto unpin;
+               }
 
+               if (sgt) {
                        err = dma_map_sg(dc->dev, sgt->sgl, sgt->nents,
                                         DMA_TO_DEVICE);
                        if (err == 0) {
@@ -143,7 +151,7 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state)
                        state->iova[i] = sg_dma_address(sgt->sgl);
                        state->sgt[i] = sgt;
                } else {
-                       state->iova[i] = bo->iova;
+                       state->iova[i] = phys_addr;
                }
        }
 
@@ -156,9 +164,11 @@ unpin:
                struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
                struct sg_table *sgt = state->sgt[i];
 
-               dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
-               host1x_bo_unpin(dc->dev, &bo->base, sgt);
+               if (sgt)
+                       dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents,
+                                    DMA_TO_DEVICE);
 
+               host1x_bo_unpin(dc->dev, &bo->base, sgt);
                state->iova[i] = DMA_MAPPING_ERROR;
                state->sgt[i] = NULL;
        }
@@ -172,17 +182,13 @@ static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state)
 
        for (i = 0; i < state->base.fb->format->num_planes; i++) {
                struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
+               struct sg_table *sgt = state->sgt[i];
 
-               if (!dc->client.group) {
-                       struct sg_table *sgt = state->sgt[i];
-
-                       if (sgt) {
-                               dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents,
-                                            DMA_TO_DEVICE);
-                               host1x_bo_unpin(dc->dev, &bo->base, sgt);
-                       }
-               }
+               if (sgt)
+                       dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents,
+                                    DMA_TO_DEVICE);
 
+               host1x_bo_unpin(dc->dev, &bo->base, sgt);
                state->iova[i] = DMA_MAPPING_ERROR;
                state->sgt[i] = NULL;
        }
index 25ca54de8fc583fd8891eb7eeb706416222e3f19..0d53c08e9972dd07c976e3c8c21096d519addbe8 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/host1x.h>
+#include <linux/iommu.h>
 #include <linux/kref.h>
 #include <linux/module.h>
 #include <linux/scatterlist.h>
@@ -101,9 +102,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 {
        struct host1x_client *client = job->client;
        struct device *dev = client->dev;
+       struct iommu_domain *domain;
        unsigned int i;
        int err;
 
+       domain = iommu_get_domain_for_dev(dev);
        job->num_unpins = 0;
 
        for (i = 0; i < job->num_relocs; i++) {
@@ -117,7 +120,19 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
                        goto unpin;
                }
 
-               if (client->group)
+               /*
+                * If the client device is not attached to an IOMMU, the
+                * physical address of the buffer object can be used.
+                *
+                * Similarly, when an IOMMU domain is shared between all
+                * host1x clients, the IOVA is already available, so no
+                * need to map the buffer object again.
+                *
+                * XXX Note that this isn't always safe to do because it
+                * relies on an assumption that no cache maintenance is
+                * needed on the buffer objects.
+                */
+               if (!domain || client->group)
                        phys = &phys_addr;
                else
                        phys = NULL;
@@ -176,6 +191,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
                dma_addr_t phys_addr;
                unsigned long shift;
                struct iova *alloc;
+               dma_addr_t *phys;
                unsigned int j;
 
                g->bo = host1x_bo_get(g->bo);
@@ -184,7 +200,17 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
                        goto unpin;
                }
 
-               sgt = host1x_bo_pin(host->dev, g->bo, NULL);
+               /**
+                * If the host1x is not attached to an IOMMU, there is no need
+                * to map the buffer object for the host1x, since the physical
+                * address can simply be used.
+                */
+               if (!iommu_get_domain_for_dev(host->dev))
+                       phys = &phys_addr;
+               else
+                       phys = NULL;
+
+               sgt = host1x_bo_pin(host->dev, g->bo, phys);
                if (IS_ERR(sgt)) {
                        err = PTR_ERR(sgt);
                        goto unpin;
@@ -214,7 +240,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 
                        job->unpins[job->num_unpins].size = gather_size;
                        phys_addr = iova_dma_addr(&host->iova, alloc);
-               } else {
+               } else if (sgt) {
                        err = dma_map_sg(host->dev, sgt->sgl, sgt->nents,
                                         DMA_TO_DEVICE);
                        if (!err) {