drm/i915/bdw: New logical ring submission mechanism
authorOscar Mateo <oscar.mateo@intel.com>
Thu, 24 Jul 2014 16:04:26 +0000 (17:04 +0100)
committerDaniel Vetter <daniel.vetter@ffwll.ch>
Mon, 11 Aug 2014 20:42:36 +0000 (22:42 +0200)
Well, new-ish: if all this code looks familiar, that's because it's
a clone of the existing submission mechanism (with some modifications
here and there to adapt it to LRCs and Execlists).

And why did we do this instead of reusing code, one might wonder?
Well, there are some fears that the differences are big enough that
they will end up breaking all platforms.

Also, Execlists offer several advantages, like control over when the
GPU is done with a given workload, that can help simplify the
submission mechanism, no doubt. I am interested in getting Execlists
to work first and foremost, but in the future this parallel submission
mechanism will help us to fine tune the mechanism without affecting
old gens.

v2: Pass the ringbuffer only (whenever possible).

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
[danvet: Appease checkpatch. Again. And drop the legacy sarea gunk
that somehow crept in.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_lrc.h
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.h

index c9518c6261dee844b4011e8c32858f0c73b1def3..31025847d68060c2c1a1c7fa113a120bdc0897a1 100644 (file)
@@ -108,6 +108,195 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
        /* TODO */
 }
 
+void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
+{
+       intel_logical_ring_advance(ringbuf);
+
+       if (intel_ring_stopped(ringbuf->ring))
+               return;
+
+       /* TODO: how to submit a context to the ELSP is not here yet */
+}
+
+static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
+{
+       if (ring->outstanding_lazy_seqno)
+               return 0;
+
+       if (ring->preallocated_lazy_request == NULL) {
+               struct drm_i915_gem_request *request;
+
+               request = kmalloc(sizeof(*request), GFP_KERNEL);
+               if (request == NULL)
+                       return -ENOMEM;
+
+               ring->preallocated_lazy_request = request;
+       }
+
+       return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
+}
+
+static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
+                                    int bytes)
+{
+       struct intel_engine_cs *ring = ringbuf->ring;
+       struct drm_i915_gem_request *request;
+       u32 seqno = 0;
+       int ret;
+
+       if (ringbuf->last_retired_head != -1) {
+               ringbuf->head = ringbuf->last_retired_head;
+               ringbuf->last_retired_head = -1;
+
+               ringbuf->space = intel_ring_space(ringbuf);
+               if (ringbuf->space >= bytes)
+                       return 0;
+       }
+
+       list_for_each_entry(request, &ring->request_list, list) {
+               if (__intel_ring_space(request->tail, ringbuf->tail,
+                                      ringbuf->size) >= bytes) {
+                       seqno = request->seqno;
+                       break;
+               }
+       }
+
+       if (seqno == 0)
+               return -ENOSPC;
+
+       ret = i915_wait_seqno(ring, seqno);
+       if (ret)
+               return ret;
+
+       /* TODO: make sure we update the right ringbuffer's last_retired_head
+        * when retiring requests */
+       i915_gem_retire_requests_ring(ring);
+       ringbuf->head = ringbuf->last_retired_head;
+       ringbuf->last_retired_head = -1;
+
+       ringbuf->space = intel_ring_space(ringbuf);
+       return 0;
+}
+
+static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
+                                      int bytes)
+{
+       struct intel_engine_cs *ring = ringbuf->ring;
+       struct drm_device *dev = ring->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       unsigned long end;
+       int ret;
+
+       ret = logical_ring_wait_request(ringbuf, bytes);
+       if (ret != -ENOSPC)
+               return ret;
+
+       /* Force the context submission in case we have been skipping it */
+       intel_logical_ring_advance_and_submit(ringbuf);
+
+       /* With GEM the hangcheck timer should kick us out of the loop,
+        * leaving it early runs the risk of corrupting GEM state (due
+        * to running on almost untested codepaths). But on resume
+        * timers don't work yet, so prevent a complete hang in that
+        * case by choosing an insanely large timeout. */
+       end = jiffies + 60 * HZ;
+
+       do {
+               ringbuf->head = I915_READ_HEAD(ring);
+               ringbuf->space = intel_ring_space(ringbuf);
+               if (ringbuf->space >= bytes) {
+                       ret = 0;
+                       break;
+               }
+
+               msleep(1);
+
+               if (dev_priv->mm.interruptible && signal_pending(current)) {
+                       ret = -ERESTARTSYS;
+                       break;
+               }
+
+               ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+                                          dev_priv->mm.interruptible);
+               if (ret)
+                       break;
+
+               if (time_after(jiffies, end)) {
+                       ret = -EBUSY;
+                       break;
+               }
+       } while (1);
+
+       return ret;
+}
+
+static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
+{
+       uint32_t __iomem *virt;
+       int rem = ringbuf->size - ringbuf->tail;
+
+       if (ringbuf->space < rem) {
+               int ret = logical_ring_wait_for_space(ringbuf, rem);
+
+               if (ret)
+                       return ret;
+       }
+
+       virt = ringbuf->virtual_start + ringbuf->tail;
+       rem /= 4;
+       while (rem--)
+               iowrite32(MI_NOOP, virt++);
+
+       ringbuf->tail = 0;
+       ringbuf->space = intel_ring_space(ringbuf);
+
+       return 0;
+}
+
+static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
+{
+       int ret;
+
+       if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
+               ret = logical_ring_wrap_buffer(ringbuf);
+               if (unlikely(ret))
+                       return ret;
+       }
+
+       if (unlikely(ringbuf->space < bytes)) {
+               ret = logical_ring_wait_for_space(ringbuf, bytes);
+               if (unlikely(ret))
+                       return ret;
+       }
+
+       return 0;
+}
+
+int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
+{
+       struct intel_engine_cs *ring = ringbuf->ring;
+       struct drm_device *dev = ring->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int ret;
+
+       ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+                                  dev_priv->mm.interruptible);
+       if (ret)
+               return ret;
+
+       ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
+       if (ret)
+               return ret;
+
+       /* Preallocate the olr before touching the ring */
+       ret = logical_ring_alloc_seqno(ring);
+       if (ret)
+               return ret;
+
+       ringbuf->space -= num_dwords * sizeof(uint32_t);
+       return 0;
+}
+
 static int gen8_init_common_ring(struct intel_engine_cs *ring)
 {
        struct drm_device *dev = ring->dev;
index bf0eff4e9f088edfae395230f89b5b79979fffef..4e032875c1fdaedfe96188637175e26e05070c68 100644 (file)
@@ -29,6 +29,19 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring);
 void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
 int intel_logical_rings_init(struct drm_device *dev);
 
+void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
+static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
+{
+       ringbuf->tail &= ringbuf->size - 1;
+}
+static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
+                                          u32 data)
+{
+       iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
+       ringbuf->tail += 4;
+}
+int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords);
+
 /* Logical Ring Contexts */
 void intel_lr_context_free(struct intel_context *ctx);
 int intel_lr_context_deferred_create(struct intel_context *ctx,
index dab5e7c790361255c400e4870e69ebbfb2a99511..0bfa018fab200ec81925b340285f2a7950120ac2 100644 (file)
@@ -57,7 +57,7 @@ intel_ring_initialized(struct intel_engine_cs *ring)
                return ring->buffer && ring->buffer->obj;
 }
 
-static inline int __ring_space(int head, int tail, int size)
+int __intel_ring_space(int head, int tail, int size)
 {
        int space = head - (tail + I915_RING_FREE_SPACE);
        if (space < 0)
@@ -65,12 +65,13 @@ static inline int __ring_space(int head, int tail, int size)
        return space;
 }
 
-static inline int ring_space(struct intel_ringbuffer *ringbuf)
+int intel_ring_space(struct intel_ringbuffer *ringbuf)
 {
-       return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size);
+       return __intel_ring_space(ringbuf->head & HEAD_ADDR,
+                                 ringbuf->tail, ringbuf->size);
 }
 
-static bool intel_ring_stopped(struct intel_engine_cs *ring)
+bool intel_ring_stopped(struct intel_engine_cs *ring)
 {
        struct drm_i915_private *dev_priv = ring->dev->dev_private;
        return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
@@ -585,7 +586,7 @@ static int init_ring_common(struct intel_engine_cs *ring)
        else {
                ringbuf->head = I915_READ_HEAD(ring);
                ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
-               ringbuf->space = ring_space(ringbuf);
+               ringbuf->space = intel_ring_space(ringbuf);
                ringbuf->last_retired_head = -1;
        }
 
@@ -1702,13 +1703,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
                ringbuf->head = ringbuf->last_retired_head;
                ringbuf->last_retired_head = -1;
 
-               ringbuf->space = ring_space(ringbuf);
+               ringbuf->space = intel_ring_space(ringbuf);
                if (ringbuf->space >= n)
                        return 0;
        }
 
        list_for_each_entry(request, &ring->request_list, list) {
-               if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) {
+               if (__intel_ring_space(request->tail, ringbuf->tail,
+                                      ringbuf->size) >= n) {
                        seqno = request->seqno;
                        break;
                }
@@ -1725,7 +1727,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
        ringbuf->head = ringbuf->last_retired_head;
        ringbuf->last_retired_head = -1;
 
-       ringbuf->space = ring_space(ringbuf);
+       ringbuf->space = intel_ring_space(ringbuf);
        return 0;
 }
 
@@ -1754,7 +1756,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
        trace_i915_ring_wait_begin(ring);
        do {
                ringbuf->head = I915_READ_HEAD(ring);
-               ringbuf->space = ring_space(ringbuf);
+               ringbuf->space = intel_ring_space(ringbuf);
                if (ringbuf->space >= n) {
                        ret = 0;
                        break;
@@ -1806,7 +1808,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
                iowrite32(MI_NOOP, virt++);
 
        ringbuf->tail = 0;
-       ringbuf->space = ring_space(ringbuf);
+       ringbuf->space = intel_ring_space(ringbuf);
 
        return 0;
 }
index 677df0d7be48ee11ee2f35a20943b4c901cc04c3..81bad364e36d597f84f60519f03f015456de47a2 100644 (file)
@@ -374,6 +374,9 @@ static inline void intel_ring_advance(struct intel_engine_cs *ring)
        struct intel_ringbuffer *ringbuf = ring->buffer;
        ringbuf->tail &= ringbuf->size - 1;
 }
+int __intel_ring_space(int head, int tail, int size);
+int intel_ring_space(struct intel_ringbuffer *ringbuf);
+bool intel_ring_stopped(struct intel_engine_cs *ring);
 void __intel_ring_advance(struct intel_engine_cs *ring);
 
 int __must_check intel_ring_idle(struct intel_engine_cs *ring);