Merge tag 'md/4.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / i915_gem.c
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index 615f0a855222f630d07311c92dce17d3bd371298..969bac8404f18cb31d4b22da8b0284d42f174541 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,15 +46,13 @@
  #include <linux/dma-buf.h>
  
  static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
-static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
  
  static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
  {
-       if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+       if (obj->cache_dirty)
                 return false;
  
-       if (!i915_gem_object_is_coherent(obj))
+       if (!obj->cache_coherent)
                 return true;
  
         return obj->pin_display;
@@ -145,9 +143,9 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
         struct i915_ggtt *ggtt = &dev_priv->ggtt;
         struct drm_i915_gem_get_aperture *args = data;
         struct i915_vma *vma;
-       size_t pinned;
+       u64 pinned;
  
-       pinned = 0;
+       pinned = ggtt->base.reserved;
         mutex_lock(&dev->struct_mutex);
         list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
                 if (i915_vma_is_pinned(vma))
@@ -235,6 +233,14 @@ err_phys:
         return st;
  }
  
+static void __start_cpu_write(struct drm_i915_gem_object *obj)
+{
+       obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+       obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+       if (cpu_write_needs_clflush(obj))
+               obj->cache_dirty = true;
+}
+
  static void
  __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
                                 struct sg_table *pages,
@@ -247,11 +253,10 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
  
         if (needs_clflush &&
             (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
-           !i915_gem_object_is_coherent(obj))
+           !obj->cache_coherent)
                 drm_clflush_sg(pages);
  
-       obj->base.read_domains = I915_GEM_DOMAIN_CPU;
-       obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+       __start_cpu_write(obj);
  }
  
  static void
@@ -686,6 +691,12 @@ i915_gem_dumb_create(struct drm_file *file,
                                args->size, &args->handle);
  }
  
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+       return !(obj->cache_level == I915_CACHE_NONE ||
+                obj->cache_level == I915_CACHE_WT);
+}
+
  /**
   * Creates a new mm object and returns a handle to it.
   * @dev: drm device pointer
@@ -705,6 +716,66 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
                                args->size, &args->handle);
  }
  
+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+       return (domain == I915_GEM_DOMAIN_GTT ?
+               obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+static void
+flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+{
+       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+
+       if (!(obj->base.write_domain & flush_domains))
+               return;
+
+       /* No actual flushing is required for the GTT write domain.  Writes
+        * to it "immediately" go to main memory as far as we know, so there's
+        * no chipset flush.  It also doesn't land in render cache.
+        *
+        * However, we do have to enforce the order so that all writes through
+        * the GTT land before any writes to the device, such as updates to
+        * the GATT itself.
+        *
+        * We also have to wait a bit for the writes to land from the GTT.
+        * An uncached read (i.e. mmio) seems to be ideal for the round-trip
+        * timing. This issue has only been observed when switching quickly
+        * between GTT writes and CPU reads from inside the kernel on recent hw,
+        * and it appears to only affect discrete GTT blocks (i.e. on LLC
+        * system agents we cannot reproduce this behaviour).
+        */
+       wmb();
+
+       switch (obj->base.write_domain) {
+       case I915_GEM_DOMAIN_GTT:
+               if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+                       if (intel_runtime_pm_get_if_in_use(dev_priv)) {
+                               spin_lock_irq(&dev_priv->uncore.lock);
+                               POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+                               spin_unlock_irq(&dev_priv->uncore.lock);
+                               intel_runtime_pm_put(dev_priv);
+                       }
+               }
+
+               intel_fb_obj_flush(obj,
+                                  fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+               break;
+
+       case I915_GEM_DOMAIN_CPU:
+               i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+               break;
+
+       case I915_GEM_DOMAIN_RENDER:
+               if (gpu_write_needs_clflush(obj))
+                       obj->cache_dirty = true;
+               break;
+       }
+
+       obj->base.write_domain = 0;
+}
+
  static inline int
  __copy_to_user_swizzled(char __user *cpu_vaddr,
                         const char *gpu_vaddr, int gpu_offset,
@@ -785,8 +856,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
         if (ret)
                 return ret;
  
-       if (i915_gem_object_is_coherent(obj) ||
-           !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+       if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
                 ret = i915_gem_object_set_to_cpu_domain(obj, false);
                 if (ret)
                         goto err_unpin;
@@ -794,14 +864,15 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
                         goto out;
         }
  
-       i915_gem_object_flush_gtt_write_domain(obj);
+       flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
  
         /* If we're not in the cpu read domain, set ourself into the gtt
          * read domain and manually flush cachelines (if required). This
          * optimizes for the case when the gpu will dirty the data
          * anyway again before the next pread happens.
          */
-       if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
+       if (!obj->cache_dirty &&
+           !(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
                 *needs_clflush = CLFLUSH_BEFORE;
  
  out:
@@ -837,8 +908,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
         if (ret)
                 return ret;
  
-       if (i915_gem_object_is_coherent(obj) ||
-           !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+       if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
                 if (ret)
                         goto err_unpin;
@@ -846,21 +916,23 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
                         goto out;
         }
  
-       i915_gem_object_flush_gtt_write_domain(obj);
+       flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
  
         /* If we're not in the cpu write domain, set ourself into the
          * gtt write domain and manually flush cachelines (as required).
          * This optimizes for the case when the gpu will use the data
          * right away and we therefore have to clflush anyway.
          */
-       if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
+       if (!obj->cache_dirty) {
                 *needs_clflush |= CLFLUSH_AFTER;
  
-       /* Same trick applies to invalidate partially written cachelines read
-        * before writing.
-        */
-       if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
-               *needs_clflush |= CLFLUSH_BEFORE;
+               /*
+                * Same trick applies to invalidate partially written
+                * cachelines read before writing.
+                */
+               if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU))
+                       *needs_clflush |= CLFLUSH_BEFORE;
+       }
  
  out:
         intel_fb_obj_invalidate(obj, ORIGIN_CPU);
@@ -1501,13 +1573,6 @@ err:
         return ret;
  }
  
-static inline enum fb_op_origin
-write_origin(struct drm_i915_gem_object *obj, unsigned domain)
-{
-       return (domain == I915_GEM_DOMAIN_GTT ?
-               obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
  static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
  {
         struct drm_i915_private *i915;
@@ -1591,10 +1656,12 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
         if (err)
                 goto out_unpin;
  
-       if (read_domains & I915_GEM_DOMAIN_GTT)
-               err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
+       if (read_domains & I915_GEM_DOMAIN_WC)
+               err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+       else if (read_domains & I915_GEM_DOMAIN_GTT)
+               err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
         else
-               err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
+               err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
  
         /* And bump the LRU for this access */
         i915_gem_object_bump_inactive_ggtt(obj);
@@ -1602,7 +1669,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
         mutex_unlock(&dev->struct_mutex);
  
         if (write_domain != 0)
-               intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
+               intel_fb_obj_invalidate(obj,
+                                       fb_write_origin(obj, write_domain));
  
  out_unpin:
         i915_gem_object_unpin_pages(obj);
@@ -1737,6 +1805,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
   *     into userspace. (This view is aligned and sized appropriately for
   *     fenced access.)
   *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ *     delayed writes via GTT before performing direct access via WC.
+ *
   * Restrictions:
   *
   *  * snoopable objects cannot be accessed via the GTT. It can cause machine
@@ -1764,7 +1835,7 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
   */
  int i915_gem_mmap_gtt_version(void)
  {
-       return 1;
+       return 2;
  }
  
  static inline struct i915_ggtt_view
@@ -2228,7 +2299,7 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
         if (obj->mm.mapping) {
                 void *ptr;
  
-               ptr = ptr_mask_bits(obj->mm.mapping);
+               ptr = page_mask_bits(obj->mm.mapping);
                 if (is_vmalloc_addr(ptr))
                         vunmap(ptr);
                 else
@@ -2315,8 +2386,7 @@ rebuild_st:
          * Fail silently without starting the shrinker
          */
         mapping = obj->base.filp->f_mapping;
-       noreclaim = mapping_gfp_constraint(mapping,
-                                          ~(__GFP_IO | __GFP_RECLAIM));
+       noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
         noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
  
         sg = st->sgl;
@@ -2364,8 +2434,9 @@ rebuild_st:
                                  * again with !__GFP_NORETRY. However, we still
                                  * want to fail this allocation rather than
                                  * trigger the out-of-memory killer and for
-                                * this we want the future __GFP_MAYFAIL.
+                                * this we want __GFP_RETRY_MAYFAIL.
                                  */
+                               gfp |= __GFP_RETRY_MAYFAIL;
                         }
                 } while (1);
  
@@ -2524,7 +2595,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
  
         if (n_pages > ARRAY_SIZE(stack_pages)) {
                 /* Too big for stack -- allocate temporary array instead */
-               pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
+               pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_TEMPORARY);
                 if (!pages)
                         return NULL;
         }
@@ -2546,7 +2617,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
         addr = vmap(pages, n_pages, 0, pgprot);
  
         if (pages != stack_pages)
-               drm_free_large(pages);
+               kvfree(pages);
  
         return addr;
  }
@@ -2580,7 +2651,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
         }
         GEM_BUG_ON(!obj->mm.pages);
  
-       ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
+       ptr = page_unpack_bits(obj->mm.mapping, &has_type);
         if (ptr && has_type != type) {
                 if (pinned) {
                         ret = -EBUSY;
@@ -2602,7 +2673,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
                         goto err_unpin;
                 }
  
-               obj->mm.mapping = ptr_pack_bits(ptr, type);
+               obj->mm.mapping = page_pack_bits(ptr, type);
         }
  
  out_unlock:
@@ -2987,12 +3058,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
          */
  
         if (i915.enable_execlists) {
+               struct execlist_port *port = engine->execlist_port;
                 unsigned long flags;
+               unsigned int n;
  
                 spin_lock_irqsave(&engine->timeline->lock, flags);
  
-               i915_gem_request_put(engine->execlist_port[0].request);
-               i915_gem_request_put(engine->execlist_port[1].request);
+               for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+                       i915_gem_request_put(port_request(&port[n]));
                 memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
                 engine->execlist_queue = RB_ROOT;
                 engine->execlist_first = NULL;
@@ -3121,8 +3194,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
         struct drm_i915_private *dev_priv =
                 container_of(work, typeof(*dev_priv), gt.idle_work.work);
         struct drm_device *dev = &dev_priv->drm;
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
         bool rearm_hangcheck;
  
         if (!READ_ONCE(dev_priv->gt.awake))
@@ -3160,10 +3231,8 @@ i915_gem_idle_work_handler(struct work_struct *work)
         if (wait_for(intel_engines_are_idle(dev_priv), 10))
                 DRM_ERROR("Timeout waiting for engines to idle\n");
  
-       for_each_engine(engine, dev_priv, id) {
-               intel_engine_disarm_breadcrumbs(engine);
-               i915_gem_batch_pool_fini(&engine->batch_pool);
-       }
+       intel_engines_mark_idle(dev_priv);
+       i915_gem_timelines_mark_idle(dev_priv);
  
         GEM_BUG_ON(!dev_priv->gt.awake);
         dev_priv->gt.awake = false;
@@ -3193,6 +3262,10 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
                 if (vma->vm->file == fpriv)
                         i915_vma_close(vma);
  
+       vma = obj->vma_hashed;
+       if (vma && vma->ctx->file_priv == fpriv)
+               i915_vma_unlink_ctx(vma);
+
         if (i915_gem_object_is_active(obj) &&
             !i915_gem_object_has_active_reference(obj)) {
                 i915_gem_object_set_active_reference(obj);
@@ -3344,73 +3417,89 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
         return ret;
  }
  
-/** Flushes the GTT write domain for the object if it's dirty. */
-static void
-i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
+static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
  {
-       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
-       if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
-               return;
-
-       /* No actual flushing is required for the GTT write domain.  Writes
-        * to it "immediately" go to main memory as far as we know, so there's
-        * no chipset flush.  It also doesn't land in render cache.
-        *
-        * However, we do have to enforce the order so that all writes through
-        * the GTT land before any writes to the device, such as updates to
-        * the GATT itself.
-        *
-        * We also have to wait a bit for the writes to land from the GTT.
-        * An uncached read (i.e. mmio) seems to be ideal for the round-trip
-        * timing. This issue has only been observed when switching quickly
-        * between GTT writes and CPU reads from inside the kernel on recent hw,
-        * and it appears to only affect discrete GTT blocks (i.e. on LLC
-        * system agents we cannot reproduce this behaviour).
+       /*
+        * We manually flush the CPU domain so that we can override and
+        * force the flush for the display, and perform it asyncrhonously.
          */
-       wmb();
-       if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
-               if (intel_runtime_pm_get_if_in_use(dev_priv)) {
-                       spin_lock_irq(&dev_priv->uncore.lock);
-                       POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
-                       spin_unlock_irq(&dev_priv->uncore.lock);
-                       intel_runtime_pm_put(dev_priv);
-               }
-       }
-
-       intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
-
+       flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+       if (obj->cache_dirty)
+               i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
         obj->base.write_domain = 0;
  }
  
-/** Flushes the CPU write domain for the object if it's dirty. */
-static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
  {
-       if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
+       if (!READ_ONCE(obj->pin_display))
                 return;
  
-       i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-       obj->base.write_domain = 0;
+       mutex_lock(&obj->base.dev->struct_mutex);
+       __i915_gem_object_flush_for_display(obj);
+       mutex_unlock(&obj->base.dev->struct_mutex);
  }
  
-static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
+/**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
  {
-       if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
-               return;
+       int ret;
  
-       i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
-       obj->base.write_domain = 0;
-}
+       lockdep_assert_held(&obj->base.dev->struct_mutex);
  
-void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
-{
-       if (!READ_ONCE(obj->pin_display))
-               return;
+       ret = i915_gem_object_wait(obj,
+                                  I915_WAIT_INTERRUPTIBLE |
+                                  I915_WAIT_LOCKED |
+                                  (write ? I915_WAIT_ALL : 0),
+                                  MAX_SCHEDULE_TIMEOUT,
+                                  NULL);
+       if (ret)
+               return ret;
  
-       mutex_lock(&obj->base.dev->struct_mutex);
-       __i915_gem_object_flush_for_display(obj);
-       mutex_unlock(&obj->base.dev->struct_mutex);
+       if (obj->base.write_domain == I915_GEM_DOMAIN_WC)
+               return 0;
+
+       /* Flush and acquire obj->pages so that we are coherent through
+        * direct access in memory with previous cached writes through
+        * shmemfs and that our cache domain tracking remains valid.
+        * For example, if the obj->filp was moved to swap without us
+        * being notified and releasing the pages, we would mistakenly
+        * continue to assume that the obj remained out of the CPU cached
+        * domain.
+        */
+       ret = i915_gem_object_pin_pages(obj);
+       if (ret)
+               return ret;
+
+       flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+       /* Serialise direct access to this object with the barriers for
+        * coherent writes from the GPU, by effectively invalidating the
+        * WC domain upon first access.
+        */
+       if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0)
+               mb();
+
+       /* It should now be out of any other write domains, and we can update
+        * the domain values for our changes.
+        */
+       GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+       obj->base.read_domains |= I915_GEM_DOMAIN_WC;
+       if (write) {
+               obj->base.read_domains = I915_GEM_DOMAIN_WC;
+               obj->base.write_domain = I915_GEM_DOMAIN_WC;
+               obj->mm.dirty = true;
+       }
+
+       i915_gem_object_unpin_pages(obj);
+       return 0;
  }
  
  /**
@@ -3452,7 +3541,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
         if (ret)
                 return ret;
  
-       i915_gem_object_flush_cpu_write_domain(obj);
+       flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
  
         /* Serialise direct access to this object with the barriers for
          * coherent writes from the GPU, by effectively invalidating the
@@ -3595,13 +3684,11 @@ restart:
                 }
         }
  
-       if (obj->base.write_domain == I915_GEM_DOMAIN_CPU &&
-           i915_gem_object_is_coherent(obj))
-               obj->cache_dirty = true;
-
         list_for_each_entry(vma, &obj->vma_list, obj_link)
                 vma->node.color = cache_level;
         obj->cache_level = cache_level;
+       obj->cache_coherent = i915_gem_object_is_coherent(obj);
+       obj->cache_dirty = true; /* Always invalidate stale cachelines */
  
         return 0;
  }
@@ -3823,10 +3910,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
         if (ret)
                 return ret;
  
-       if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
-               return 0;
-
-       i915_gem_object_flush_gtt_write_domain(obj);
+       flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
  
         /* Flush the CPU cache if it's still invalid. */
         if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
@@ -3837,15 +3921,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
         /* It should now be out of any other write domains, and we can update
          * the domain values for our changes.
          */
-       GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
+       GEM_BUG_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
  
         /* If we're writing through the CPU, then the GPU read domains will
          * need to be invalidated at next use.
          */
-       if (write) {
-               obj->base.read_domains = I915_GEM_DOMAIN_CPU;
-               obj->base.write_domain = I915_GEM_DOMAIN_CPU;
-       }
+       if (write)
+               __start_cpu_write(obj);
  
         return 0;
  }
@@ -4020,7 +4102,7 @@ __busy_set_if_active(const struct dma_fence *fence,
         if (i915_gem_request_completed(rq))
                 return 0;
  
-       return flag(rq->engine->exec_id);
+       return flag(rq->engine->uabi_id);
  }
  
  static __always_inline unsigned int
@@ -4177,7 +4259,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
  
         INIT_LIST_HEAD(&obj->global_link);
         INIT_LIST_HEAD(&obj->userfault_link);
-       INIT_LIST_HEAD(&obj->obj_exec_link);
         INIT_LIST_HEAD(&obj->vma_list);
         INIT_LIST_HEAD(&obj->batch_pool_link);
  
@@ -4219,7 +4300,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
          * catch if we ever need to fix it. In the meantime, if you do spot
          * such a local variable, please consider fixing!
          */
-       if (WARN_ON(size >> PAGE_SHIFT > INT_MAX))
+       if (size >> PAGE_SHIFT > INT_MAX)
                 return ERR_PTR(-E2BIG);
  
         if (overflows_type(size, obj->base.size))
@@ -4266,6 +4347,9 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
         } else
                 obj->cache_level = I915_CACHE_NONE;
  
+       obj->cache_coherent = i915_gem_object_is_coherent(obj);
+       obj->cache_dirty = !obj->cache_coherent;
+
         trace_i915_gem_object_create(obj);
  
         return obj;
@@ -4314,7 +4398,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
                 GEM_BUG_ON(i915_gem_object_is_active(obj));
                 list_for_each_entry_safe(vma, vn,
                                          &obj->vma_list, obj_link) {
-                       GEM_BUG_ON(!i915_vma_is_ggtt(vma));
                         GEM_BUG_ON(i915_vma_is_active(vma));
                         vma->flags &= ~I915_VMA_PIN_MASK;
                         i915_vma_close(vma);
@@ -4327,6 +4410,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
         intel_runtime_pm_put(i915);
         mutex_unlock(&i915->drm.struct_mutex);
  
+       cond_resched();
+
         llist_for_each_entry_safe(obj, on, freed, freed) {
                 GEM_BUG_ON(obj->bind_count);
                 GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
@@ -4374,8 +4459,11 @@ static void __i915_gem_free_work(struct work_struct *work)
          * unbound now.
          */
  
-       while ((freed = llist_del_all(&i915->mm.free_list)))
+       while ((freed = llist_del_all(&i915->mm.free_list))) {
                 __i915_gem_free_objects(i915, freed);
+               if (need_resched())
+                       break;
+       }
  }
  
  static void __i915_gem_free_object_rcu(struct rcu_head *head)
@@ -4440,10 +4528,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
          * try to take over. The only way to remove the earlier state
          * is by resetting. However, resetting on earlier gen is tricky as
          * it may impact the display and we are uncertain about the stability
-        * of the reset, so we only reset recent machines with logical
-        * context support (that must be reset to remove any stray contexts).
+        * of the reset, so this could be applied to even earlier gen.
          */
-       if (HAS_HW_CONTEXTS(i915)) {
+       if (INTEL_GEN(i915) >= 5) {
                 int reset = intel_gpu_reset(i915, ALL_ENGINES);
                 WARN_ON(reset && reset != -ENODEV);
         }
@@ -4686,11 +4773,9 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
         if (value >= 0)
                 return value;
  
-#ifdef CONFIG_INTEL_IOMMU
         /* Enable semaphores on SNB when IO remapping is off */
-       if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
+       if (IS_GEN6(dev_priv) && intel_vtd_active())
                 return false;
-#endif
  
         return true;
  }
@@ -4701,7 +4786,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
  
         mutex_lock(&dev_priv->drm.struct_mutex);
  
-       i915_gem_clflush_init(dev_priv);
+       dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
  
         if (!i915.enable_execlists) {
                 dev_priv->gt.resume = intel_legacy_submission_resume;
@@ -4719,7 +4804,9 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
          */
         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
  
-       i915_gem_init_userptr(dev_priv);
+       ret = i915_gem_init_userptr(dev_priv);
+       if (ret)
+               goto out_unlock;
  
         ret = i915_gem_init_ggtt(dev_priv);
         if (ret)
@@ -4824,12 +4911,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
         if (!dev_priv->dependencies)
                 goto err_requests;
  
+       dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
+       if (!dev_priv->priorities)
+               goto err_dependencies;
+
         mutex_lock(&dev_priv->drm.struct_mutex);
         INIT_LIST_HEAD(&dev_priv->gt.timelines);
         err = i915_gem_timeline_init__global(dev_priv);
         mutex_unlock(&dev_priv->drm.struct_mutex);
         if (err)
-               goto err_dependencies;
+               goto err_priorities;
  
         INIT_LIST_HEAD(&dev_priv->context_list);
         INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
@@ -4847,14 +4938,14 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
  
         init_waitqueue_head(&dev_priv->pending_flip_queue);
  
-       dev_priv->mm.interruptible = true;
-
         atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
  
         spin_lock_init(&dev_priv->fb_tracking.lock);
  
         return 0;
  
+err_priorities:
+       kmem_cache_destroy(dev_priv->priorities);
  err_dependencies:
         kmem_cache_destroy(dev_priv->dependencies);
  err_requests:
@@ -4878,6 +4969,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
         WARN_ON(!list_empty(&dev_priv->gt.timelines));
         mutex_unlock(&dev_priv->drm.struct_mutex);
  
+       kmem_cache_destroy(dev_priv->priorities);
         kmem_cache_destroy(dev_priv->dependencies);
         kmem_cache_destroy(dev_priv->requests);
         kmem_cache_destroy(dev_priv->vmas);
@@ -4889,9 +4981,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
  
  int i915_gem_freeze(struct drm_i915_private *dev_priv)
  {
-       mutex_lock(&dev_priv->drm.struct_mutex);
+       /* Discard all purgeable objects, let userspace recover those as
+        * required after resuming.
+        */
         i915_gem_shrink_all(dev_priv);
-       mutex_unlock(&dev_priv->drm.struct_mutex);
  
         return 0;
  }
@@ -4916,17 +5009,16 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
          * we update that state just before writing out the image.
          *
          * To try and reduce the hibernation image, we manually shrink
-        * the objects as well.
+        * the objects as well, see i915_gem_freeze()
          */
  
-       mutex_lock(&dev_priv->drm.struct_mutex);
         i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
+       i915_gem_drain_freed_objects(dev_priv);
  
+       mutex_lock(&dev_priv->drm.struct_mutex);
         for (p = phases; *p; p++) {
-               list_for_each_entry(obj, *p, global_link) {
-                       obj->base.read_domains = I915_GEM_DOMAIN_CPU;
-                       obj->base.write_domain = I915_GEM_DOMAIN_CPU;
-               }
+               list_for_each_entry(obj, *p, global_link)
+                       __start_cpu_write(obj);
         }
         mutex_unlock(&dev_priv->drm.struct_mutex);