Merge tag 'drm-intel-next-2019-02-07' of git://anongit.freedesktop.org/drm/drm-intel...

[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / i915_request.c
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index a076fd0b7ba654dbc05588f7cdc02d156fa28414..c2a5c48c7541d6d1bb230933748b210ff036bd78 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,6 +29,7 @@
  #include <linux/sched/signal.h>
  
  #include "i915_drv.h"
+#include "i915_active.h"
  #include "i915_reset.h"
  
  static const char *i915_fence_get_driver_name(struct dma_fence *fence)
@@ -60,7 +61,7 @@ static bool i915_fence_signaled(struct dma_fence *fence)
  
  static bool i915_fence_enable_signaling(struct dma_fence *fence)
  {
-       return intel_engine_enable_signaling(to_request(fence), true);
+       return i915_request_enable_breadcrumb(to_request(fence));
  }
  
  static signed long i915_fence_wait(struct dma_fence *fence,
@@ -125,12 +126,6 @@ static void unreserve_gt(struct drm_i915_private *i915)
                 i915_gem_park(i915);
  }
  
-void i915_gem_retire_noop(struct i915_gem_active *active,
-                         struct i915_request *request)
-{
-       /* Space left intentionally blank */
-}
-
  static void advance_ring(struct i915_request *request)
  {
         struct intel_ring *ring = request->ring;
@@ -199,10 +194,11 @@ static void __retire_engine_request(struct intel_engine_cs *engine,
         spin_unlock(&engine->timeline.lock);
  
         spin_lock(&rq->lock);
+       i915_request_mark_complete(rq);
         if (!i915_request_signaled(rq))
                 dma_fence_signal_locked(&rq->fence);
         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
-               intel_engine_cancel_signaling(rq);
+               i915_request_cancel_breadcrumb(rq);
         if (rq->waitboost) {
                 GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
                 atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
@@ -243,7 +239,7 @@ static void __retire_engine_upto(struct intel_engine_cs *engine,
  
  static void i915_request_retire(struct i915_request *request)
  {
-       struct i915_gem_active *active, *next;
+       struct i915_active_request *active, *next;
  
         GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
                   request->engine->name,
@@ -277,10 +273,10 @@ static void i915_request_retire(struct i915_request *request)
                  * we may spend an inordinate amount of time simply handling
                  * the retirement of requests and processing their callbacks.
                  * Of which, this loop itself is particularly hot due to the
-                * cache misses when jumping around the list of i915_gem_active.
-                * So we try to keep this loop as streamlined as possible and
-                * also prefetch the next i915_gem_active to try and hide
-                * the likely cache miss.
+                * cache misses when jumping around the list of
+                * i915_active_request.  So we try to keep this loop as
+                * streamlined as possible and also prefetch the next
+                * i915_active_request to try and hide the likely cache miss.
                  */
                 prefetchw(next);
  
@@ -332,7 +328,7 @@ void i915_request_retire_upto(struct i915_request *rq)
  
  static u32 timeline_get_seqno(struct i915_timeline *tl)
  {
-       return ++tl->seqno;
+       return tl->seqno += 1 + tl->has_initial_breadcrumb;
  }
  
  static void move_to_timeline(struct i915_request *request,
@@ -376,20 +372,21 @@ void __i915_request_submit(struct i915_request *request)
  
         /* We may be recursing from the signal callback of another i915 fence */
         spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+       GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
+       set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
         request->global_seqno = seqno;
-       if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-               intel_engine_enable_signaling(request, false);
+       if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+           !i915_request_enable_breadcrumb(request))
+               intel_engine_queue_breadcrumbs(engine);
         spin_unlock(&request->lock);
  
-       engine->emit_breadcrumb(request,
-                               request->ring->vaddr + request->postfix);
+       engine->emit_fini_breadcrumb(request,
+                                    request->ring->vaddr + request->postfix);
  
         /* Transfer from per-context onto the global per-engine timeline */
         move_to_timeline(request, &engine->timeline);
  
         trace_i915_request_execute(request);
-
-       wake_up_all(&request->execute);
  }
  
  void i915_request_submit(struct i915_request *request)
@@ -432,7 +429,9 @@ void __i915_request_unsubmit(struct i915_request *request)
         spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
         request->global_seqno = 0;
         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-               intel_engine_cancel_signaling(request);
+               i915_request_cancel_breadcrumb(request);
+       GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
+       clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
         spin_unlock(&request->lock);
  
         /* Transfer back from the global per-engine timeline to per-context */
@@ -522,6 +521,11 @@ out:
         return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
  }
  
+static int add_timeline_barrier(struct i915_request *rq)
+{
+       return i915_request_await_active_request(rq, &rq->timeline->barrier);
+}
+
  /**
   * i915_request_alloc - allocate a request structure
   *
@@ -578,7 +582,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
          * We use RCU to look up requests in flight. The lookups may
          * race with the request being allocated from the slab freelist.
          * That is the request we are writing to here, may be in the process
-        * of being read by __i915_gem_active_get_rcu(). As such,
+        * of being read by __i915_active_request_get_rcu(). As such,
          * we have to be very careful when overwriting the contents. During
          * the RCU lookup, we change chase the request->engine pointer,
          * read the request->global_seqno and increment the reference count.
@@ -621,7 +625,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
         rq->ring = ce->ring;
         rq->timeline = ce->ring->timeline;
         GEM_BUG_ON(rq->timeline == &engine->timeline);
-       rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX];
+       rq->hwsp_seqno = rq->timeline->hwsp_seqno;
  
         spin_lock_init(&rq->lock);
         dma_fence_init(&rq->fence,
@@ -632,13 +636,11 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
  
         /* We bump the ref for the fence chain */
         i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
-       init_waitqueue_head(&rq->execute);
  
         i915_sched_node_init(&rq->sched);
  
         /* No zalloc, must clear what we need by hand */
         rq->global_seqno = 0;
-       rq->signaling.wait.seqno = 0;
         rq->file_priv = NULL;
         rq->batch = NULL;
         rq->capture_list = NULL;
@@ -656,7 +658,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
          * around inside i915_request_add() there is sufficient space at
          * the beginning of the ring as well.
          */
-       rq->reserved_space = 2 * engine->emit_breadcrumb_dw * sizeof(u32);
+       rq->reserved_space = 2 * engine->emit_fini_breadcrumb_dw * sizeof(u32);
  
         /*
          * Record the position of the start of the request so that
@@ -666,6 +668,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
          */
         rq->head = rq->ring->emit;
  
+       ret = add_timeline_barrier(rq);
+       if (ret)
+               goto err_unwind;
+
         ret = engine->request_alloc(rq);
         if (ret)
                 goto err_unwind;
@@ -907,7 +913,7 @@ void i915_request_add(struct i915_request *request)
          * GPU processing the request, we never over-estimate the
          * position of the ring's HEAD.
          */
-       cs = intel_ring_begin(request, engine->emit_breadcrumb_dw);
+       cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_dw);
         GEM_BUG_ON(IS_ERR(cs));
         request->postfix = intel_ring_offset(request, cs);
  
@@ -918,8 +924,8 @@ void i915_request_add(struct i915_request *request)
          * see a more recent value in the hws than we are tracking.
          */
  
-       prev = i915_gem_active_raw(&timeline->last_request,
-                                  &request->i915->drm.struct_mutex);
+       prev = i915_active_request_raw(&timeline->last_request,
+                                      &request->i915->drm.struct_mutex);
         if (prev && !i915_request_completed(prev)) {
                 i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
                                              &request->submitq);
@@ -935,7 +941,7 @@ void i915_request_add(struct i915_request *request)
         spin_unlock_irq(&timeline->lock);
  
         GEM_BUG_ON(timeline->seqno != request->fence.seqno);
-       i915_gem_active_set(&timeline->last_request, request);
+       __i915_active_request_set(&timeline->last_request, request);
  
         list_add_tail(&request->ring_link, &ring->request_list);
         if (list_is_first(&request->ring_link, &ring->request_list)) {
@@ -966,7 +972,7 @@ void i915_request_add(struct i915_request *request)
                  * Allow interactive/synchronous clients to jump ahead of
                  * the bulk clients. (FQ_CODEL)
                  */
-               if (!prev || i915_request_completed(prev))
+               if (list_empty(&request->sched.signalers_list))
                         attr.priority |= I915_PRIORITY_NEWCLIENT;
  
                 engine->schedule(request, &attr);
@@ -1029,13 +1035,10 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
         return this_cpu != cpu;
  }
  
-static bool __i915_spin_request(const struct i915_request *rq,
-                               u32 seqno, int state, unsigned long timeout_us)
+static bool __i915_spin_request(const struct i915_request * const rq,
+                               int state, unsigned long timeout_us)
  {
-       struct intel_engine_cs *engine = rq->engine;
-       unsigned int irq, cpu;
-
-       GEM_BUG_ON(!seqno);
+       unsigned int cpu;
  
         /*
          * Only wait for the request if we know it is likely to complete.
@@ -1043,12 +1046,12 @@ static bool __i915_spin_request(const struct i915_request *rq,
          * We don't track the timestamps around requests, nor the average
          * request length, so we do not have a good indicator that this
          * request will complete within the timeout. What we do know is the
-        * order in which requests are executed by the engine and so we can
-        * tell if the request has started. If the request hasn't started yet,
-        * it is a fair assumption that it will not complete within our
-        * relatively short timeout.
+        * order in which requests are executed by the context and so we can
+        * tell if the request has been started. If the request is not even
+        * running yet, it is a fair assumption that it will not complete
+        * within our relatively short timeout.
          */
-       if (!intel_engine_has_started(engine, seqno))
+       if (!i915_request_is_running(rq))
                 return false;
  
         /*
@@ -1062,20 +1065,10 @@ static bool __i915_spin_request(const struct i915_request *rq,
          * takes to sleep on a request, on the order of a microsecond.
          */
  
-       irq = READ_ONCE(engine->breadcrumbs.irq_count);
         timeout_us += local_clock_us(&cpu);
         do {
-               if (intel_engine_has_completed(engine, seqno))
-                       return seqno == i915_request_global_seqno(rq);
-
-               /*
-                * Seqno are meant to be ordered *before* the interrupt. If
-                * we see an interrupt without a corresponding seqno advance,
-                * assume we won't see one in the near future but require
-                * the engine->seqno_barrier() to fixup coherency.
-                */
-               if (READ_ONCE(engine->breadcrumbs.irq_count) != irq)
-                       break;
+               if (i915_request_completed(rq))
+                       return true;
  
                 if (signal_pending_state(state, current))
                         break;
@@ -1089,6 +1082,18 @@ static bool __i915_spin_request(const struct i915_request *rq,
         return false;
  }
  
+struct request_wait {
+       struct dma_fence_cb cb;
+       struct task_struct *tsk;
+};
+
+static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+       struct request_wait *wait = container_of(cb, typeof(*wait), cb);
+
+       wake_up_process(wait->tsk);
+}
+
  /**
   * i915_request_wait - wait until execution of request has finished
   * @rq: the request to wait upon
@@ -1114,8 +1119,7 @@ long i915_request_wait(struct i915_request *rq,
  {
         const int state = flags & I915_WAIT_INTERRUPTIBLE ?
                 TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-       DEFINE_WAIT_FUNC(exec, default_wake_function);
-       struct intel_wait wait;
+       struct request_wait wait;
  
         might_sleep();
         GEM_BUG_ON(timeout < 0);
@@ -1127,47 +1131,24 @@ long i915_request_wait(struct i915_request *rq,
                 return -ETIME;
  
         trace_i915_request_wait_begin(rq, flags);
-       add_wait_queue(&rq->execute, &exec);
-       intel_wait_init(&wait);
-       if (flags & I915_WAIT_PRIORITY)
-               i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
-
-restart:
-       do {
-               set_current_state(state);
-               if (intel_wait_update_request(&wait, rq))
-                       break;
-
-               if (signal_pending_state(state, current)) {
-                       timeout = -ERESTARTSYS;
-                       goto complete;
-               }
  
-               if (!timeout) {
-                       timeout = -ETIME;
-                       goto complete;
-               }
+       /* Optimistic short spin before touching IRQs */
+       if (__i915_spin_request(rq, state, 5))
+               goto out;
  
-               timeout = io_schedule_timeout(timeout);
-       } while (1);
+       if (flags & I915_WAIT_PRIORITY)
+               i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
  
-       GEM_BUG_ON(!intel_wait_has_seqno(&wait));
-       GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+       wait.tsk = current;
+       if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
+               goto out;
  
-       /* Optimistic short spin before touching IRQs */
-       if (__i915_spin_request(rq, wait.seqno, state, 5))
-               goto complete;
+       for (;;) {
+               set_current_state(state);
  
-       set_current_state(state);
-       if (intel_engine_add_wait(rq->engine, &wait))
-               /*
-                * In order to check that we haven't missed the interrupt
-                * as we enabled it, we need to kick ourselves to do a
-                * coherent check on the seqno before we sleep.
-                */
-               goto wakeup;
+               if (i915_request_completed(rq))
+                       break;
  
-       for (;;) {
                 if (signal_pending_state(state, current)) {
                         timeout = -ERESTARTSYS;
                         break;
@@ -1179,33 +1160,13 @@ restart:
                 }
  
                 timeout = io_schedule_timeout(timeout);
-
-               if (intel_wait_complete(&wait) &&
-                   intel_wait_check_request(&wait, rq))
-                       break;
-
-               set_current_state(state);
-
-wakeup:
-               if (i915_request_completed(rq))
-                       break;
-
-               /* Only spin if we know the GPU is processing this request */
-               if (__i915_spin_request(rq, wait.seqno, state, 2))
-                       break;
-
-               if (!intel_wait_check_request(&wait, rq)) {
-                       intel_engine_remove_wait(rq->engine, &wait);
-                       goto restart;
-               }
         }
-
-       intel_engine_remove_wait(rq->engine, &wait);
-complete:
         __set_current_state(TASK_RUNNING);
-       remove_wait_queue(&rq->execute, &exec);
-       trace_i915_request_wait_end(rq);
  
+       dma_fence_remove_callback(&rq->fence, &wait.cb);
+
+out:
+       trace_i915_request_wait_end(rq);
         return timeout;
  }