Merge drm-next into drm-intel-next-queued
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / intel_ringbuffer.h
index 2863d5a65187a970eaa5c8872e66d53bc28eed9f..bbacf4d0f4cb56a6a1c74cf57e22098b4ad49f6d 100644 (file)
@@ -3,9 +3,12 @@
 #define _INTEL_RINGBUFFER_H_
 
 #include <linux/hashtable.h>
+
 #include "i915_gem_batch_pool.h"
-#include "i915_gem_request.h"
 #include "i915_gem_timeline.h"
+
+#include "i915_pmu.h"
+#include "i915_request.h"
 #include "i915_selftest.h"
 
 struct drm_printer;
@@ -47,16 +50,6 @@ struct intel_hw_status_page {
 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  */
-#define gen8_semaphore_seqno_size sizeof(uint64_t)
-#define GEN8_SEMAPHORE_OFFSET(__from, __to)                         \
-       (((__from) * I915_NUM_ENGINES  + (__to)) * gen8_semaphore_seqno_size)
-#define GEN8_SIGNAL_OFFSET(__ring, to)                      \
-       (dev_priv->semaphore->node.start + \
-        GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
-#define GEN8_WAIT_OFFSET(__ring, from)                      \
-       (dev_priv->semaphore->node.start + \
-        GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
-
 enum intel_engine_hangcheck_action {
        ENGINE_IDLE = 0,
        ENGINE_WAIT,
@@ -124,7 +117,7 @@ struct intel_engine_hangcheck {
        unsigned long action_timestamp;
        int deadlock;
        struct intel_instdone instdone;
-       struct drm_i915_gem_request *active_request;
+       struct i915_request *active_request;
        bool stalled;
 };
 
@@ -165,8 +158,7 @@ struct i915_ctx_workarounds {
        struct i915_vma *vma;
 };
 
-struct drm_i915_gem_request;
-struct intel_render_state;
+struct i915_request;
 
 /*
  * Engine IDs definitions.
@@ -195,9 +187,9 @@ struct i915_priolist {
  */
 struct intel_engine_execlists {
        /**
-        * @irq_tasklet: softirq tasklet for bottom handler
+        * @tasklet: softirq tasklet for bottom handler
         */
-       struct tasklet_struct irq_tasklet;
+       struct tasklet_struct tasklet;
 
        /**
         * @default_priolist: priority list for I915_PRIORITY_NORMAL
@@ -209,6 +201,11 @@ struct intel_engine_execlists {
         */
        bool no_priolist;
 
+       /**
+        * @elsp: the ExecList Submission Port register
+        */
+       u32 __iomem *elsp;
+
        /**
         * @port: execlist port states
         *
@@ -223,7 +220,7 @@ struct intel_engine_execlists {
                /**
                 * @request_count: combined request and submission count
                 */
-               struct drm_i915_gem_request *request_count;
+               struct i915_request *request_count;
 #define EXECLIST_COUNT_BITS 2
 #define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
 #define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
@@ -253,12 +250,23 @@ struct intel_engine_execlists {
        unsigned int active;
 #define EXECLISTS_ACTIVE_USER 0
 #define EXECLISTS_ACTIVE_PREEMPT 1
+#define EXECLISTS_ACTIVE_HWACK 2
 
        /**
         * @port_mask: number of execlist ports - 1
         */
        unsigned int port_mask;
 
+       /**
+        * @queue_priority: Highest pending priority.
+        *
+        * When we add requests into the queue, or adjust the priority of
+        * executing requests, we compute the maximum priority of those
+        * pending requests. We can then use this value to determine if
+        * we need to preempt the executing requests to service the queue.
+        */
+       int queue_priority;
+
        /**
         * @queue: queue of requests, in priority lists
         */
@@ -283,6 +291,11 @@ struct intel_engine_execlists {
         * @csb_use_mmio: access csb through mmio, instead of hwsp
         */
        bool csb_use_mmio;
+
+       /**
+        * @preempt_complete_status: expected CSB upon completing preemption
+        */
+       u32 preempt_complete_status;
 };
 
 #define INTEL_ENGINE_CS_MAX_NAME 8
@@ -290,11 +303,14 @@ struct intel_engine_execlists {
 struct intel_engine_cs {
        struct drm_i915_private *i915;
        char name[INTEL_ENGINE_CS_MAX_NAME];
+
        enum intel_engine_id id;
-       unsigned int uabi_id;
        unsigned int hw_id;
        unsigned int guc_id;
 
+       u8 uabi_id;
+       u8 uabi_class;
+
        u8 class;
        u8 instance;
        u32 context_size;
@@ -304,7 +320,7 @@ struct intel_engine_cs {
        struct intel_ring *buffer;
        struct intel_timeline *timeline;
 
-       struct intel_render_state *render_state;
+       struct drm_i915_gem_object *default_state;
 
        atomic_t irq_count;
        unsigned long irq_posted;
@@ -335,17 +351,40 @@ struct intel_engine_cs {
                struct rb_root waiters; /* sorted by retirement, priority */
                struct rb_root signals; /* sorted by retirement */
                struct task_struct *signaler; /* used for fence signalling */
-               struct drm_i915_gem_request __rcu *first_signal;
+               struct i915_request __rcu *first_signal;
                struct timer_list fake_irq; /* used after a missed interrupt */
                struct timer_list hangcheck; /* detect missed interrupts */
 
                unsigned int hangcheck_interrupts;
+               unsigned int irq_enabled;
 
                bool irq_armed : 1;
-               bool irq_enabled : 1;
                I915_SELFTEST_DECLARE(bool mock : 1);
        } breadcrumbs;
 
+       struct {
+               /**
+                * @enable: Bitmask of enable sample events on this engine.
+                *
+                * Bits correspond to sample event types, for instance
+                * I915_SAMPLE_QUEUED is bit 0 etc.
+                */
+               u32 enable;
+               /**
+                * @enable_count: Reference count for the enabled samplers.
+                *
+                * Index number corresponds to the bit number from @enable.
+                */
+               unsigned int enable_count[I915_PMU_SAMPLE_BITS];
+               /**
+                * @sample: Counter values for sampling events.
+                *
+                * Our internal timer stores the current counters in this field.
+                */
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
+               struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
+       } pmu;
+
        /*
         * A pool of objects to use as shadow copies of client batch buffers
         * when the command parser is enabled. Prevents the client from
@@ -364,7 +403,10 @@ struct intel_engine_cs {
 
        int             (*init_hw)(struct intel_engine_cs *engine);
        void            (*reset_hw)(struct intel_engine_cs *engine,
-                                   struct drm_i915_gem_request *req);
+                                   struct i915_request *rq);
+
+       void            (*park)(struct intel_engine_cs *engine);
+       void            (*unpark)(struct intel_engine_cs *engine);
 
        void            (*set_default_submission)(struct intel_engine_cs *engine);
 
@@ -372,22 +414,20 @@ struct intel_engine_cs {
                                          struct i915_gem_context *ctx);
        void            (*context_unpin)(struct intel_engine_cs *engine,
                                         struct i915_gem_context *ctx);
-       int             (*request_alloc)(struct drm_i915_gem_request *req);
-       int             (*init_context)(struct drm_i915_gem_request *req);
+       int             (*request_alloc)(struct i915_request *rq);
+       int             (*init_context)(struct i915_request *rq);
 
-       int             (*emit_flush)(struct drm_i915_gem_request *request,
-                                     u32 mode);
+       int             (*emit_flush)(struct i915_request *request, u32 mode);
 #define EMIT_INVALIDATE        BIT(0)
 #define EMIT_FLUSH     BIT(1)
 #define EMIT_BARRIER   (EMIT_INVALIDATE | EMIT_FLUSH)
-       int             (*emit_bb_start)(struct drm_i915_gem_request *req,
+       int             (*emit_bb_start)(struct i915_request *rq,
                                         u64 offset, u32 length,
                                         unsigned int dispatch_flags);
 #define I915_DISPATCH_SECURE BIT(0)
 #define I915_DISPATCH_PINNED BIT(1)
 #define I915_DISPATCH_RS     BIT(2)
-       void            (*emit_breadcrumb)(struct drm_i915_gem_request *req,
-                                          u32 *cs);
+       void            (*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
        int             emit_breadcrumb_sz;
 
        /* Pass the request to the hardware queue (e.g. directly into
@@ -396,7 +436,7 @@ struct intel_engine_cs {
         * This is called from an atomic context with irqs disabled; must
         * be irq safe.
         */
-       void            (*submit_request)(struct drm_i915_gem_request *req);
+       void            (*submit_request)(struct i915_request *rq);
 
        /* Call when the priority on a request has changed and it and its
         * dependencies may need rescheduling. Note the request itself may
@@ -404,8 +444,7 @@ struct intel_engine_cs {
         *
         * Called under the struct_mutex.
         */
-       void            (*schedule)(struct drm_i915_gem_request *request,
-                                   int priority);
+       void            (*schedule)(struct i915_request *request, int priority);
 
        /*
         * Cancel all requests on the hardware, or queued for execution.
@@ -462,23 +501,20 @@ struct intel_engine_cs {
         *  ie. transpose of f(x, y)
         */
        struct {
-               union {
 #define GEN6_SEMAPHORE_LAST    VECS_HW
 #define GEN6_NUM_SEMAPHORES    (GEN6_SEMAPHORE_LAST + 1)
 #define GEN6_SEMAPHORES_MASK   GENMASK(GEN6_SEMAPHORE_LAST, 0)
-                       struct {
-                               /* our mbox written by others */
-                               u32             wait[GEN6_NUM_SEMAPHORES];
-                               /* mboxes this ring signals to */
-                               i915_reg_t      signal[GEN6_NUM_SEMAPHORES];
-                       } mbox;
-                       u64             signal_ggtt[I915_NUM_ENGINES];
-               };
+               struct {
+                       /* our mbox written by others */
+                       u32             wait[GEN6_NUM_SEMAPHORES];
+                       /* mboxes this ring signals to */
+                       i915_reg_t      signal[GEN6_NUM_SEMAPHORES];
+               } mbox;
 
                /* AKA wait() */
-               int     (*sync_to)(struct drm_i915_gem_request *req,
-                                  struct drm_i915_gem_request *signal);
-               u32     *(*signal)(struct drm_i915_gem_request *req, u32 *cs);
+               int     (*sync_to)(struct i915_request *rq,
+                                  struct i915_request *signal);
+               u32     *(*signal)(struct i915_request *rq, u32 *cs);
        } semaphore;
 
        struct intel_engine_execlists execlists;
@@ -501,13 +537,16 @@ struct intel_engine_cs {
         * stream (ring).
         */
        struct i915_gem_context *legacy_active_context;
+       struct i915_hw_ppgtt *legacy_active_ppgtt;
 
        /* status_notifier: list of callbacks for context-switch changes */
        struct atomic_notifier_head context_status_notifier;
 
        struct intel_engine_hangcheck hangcheck;
 
-       bool needs_cmd_parser;
+#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_SUPPORTS_STATS   BIT(1)
+       unsigned int flags;
 
        /*
         * Table of commands the command parser needs to know about
@@ -532,8 +571,50 @@ struct intel_engine_cs {
         * certain bits to encode the command length in the header).
         */
        u32 (*get_cmd_length_mask)(u32 cmd_header);
+
+       struct {
+               /**
+                * @lock: Lock protecting the below fields.
+                */
+               spinlock_t lock;
+               /**
+                * @enabled: Reference count indicating number of listeners.
+                */
+               unsigned int enabled;
+               /**
+                * @active: Number of contexts currently scheduled in.
+                */
+               unsigned int active;
+               /**
+                * @enabled_at: Timestamp when busy stats were enabled.
+                */
+               ktime_t enabled_at;
+               /**
+                * @start: Timestamp of the last idle to active transition.
+                *
+                * Idle is defined as active == 0, active is active > 0.
+                */
+               ktime_t start;
+               /**
+                * @total: Total time this engine was busy.
+                *
+                * Accumulated time not counting the most recent block in cases
+                * where engine is currently busy (active > 0).
+                */
+               ktime_t total;
+       } stats;
 };
 
+static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
+{
+       return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+}
+
+static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine)
+{
+       return engine->flags & I915_ENGINE_SUPPORTS_STATS;
+}
+
 static inline void
 execlists_set_active(struct intel_engine_execlists *execlists,
                     unsigned int bit)
@@ -555,6 +636,12 @@ execlists_is_active(const struct intel_engine_execlists *execlists,
        return test_bit(bit, (unsigned long *)&execlists->active);
 }
 
+void
+execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
+
+void
+execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
+
 static inline unsigned int
 execlists_num_ports(const struct intel_engine_execlists * const execlists)
 {
@@ -581,7 +668,7 @@ intel_engine_flag(const struct intel_engine_cs *engine)
 }
 
 static inline u32
-intel_read_status_page(struct intel_engine_cs *engine, int reg)
+intel_read_status_page(const struct intel_engine_cs *engine, int reg)
 {
        /* Ensure that the compiler doesn't optimize away the load. */
        return READ_ONCE(engine->status_page.page_addr[reg]);
@@ -624,6 +711,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
  */
 #define I915_GEM_HWS_INDEX             0x30
 #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
+#define I915_GEM_HWS_PREEMPT_INDEX     0x32
+#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 #define I915_GEM_HWS_SCRATCH_INDEX     0x40
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
@@ -646,13 +735,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine);
 
 void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
 
-int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
+int __must_check intel_ring_cacheline_align(struct i915_request *rq);
 
-u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req,
-                                  unsigned int n);
+int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
+u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
 
-static inline void
-intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
+static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
 {
        /* Dummy function.
         *
@@ -662,22 +750,20 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
         * reserved for the command packet (i.e. the value passed to
         * intel_ring_begin()).
         */
-       GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
+       GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
 }
 
-static inline u32
-intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
 {
        return pos & (ring->size - 1);
 }
 
-static inline u32
-intel_ring_offset(const struct drm_i915_gem_request *req, void *addr)
+static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
 {
        /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
-       u32 offset = addr - req->ring->vaddr;
-       GEM_BUG_ON(offset > req->ring->size);
-       return intel_ring_wrap(req->ring, offset);
+       u32 offset = addr - rq->ring->vaddr;
+       GEM_BUG_ON(offset > rq->ring->size);
+       return intel_ring_wrap(rq->ring, offset);
 }
 
 static inline void
@@ -715,7 +801,7 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
 {
        /* Whilst writes to the tail are strictly order, there is no
         * serialisation between readers and the writers. The tail may be
-        * read by i915_gem_request_retire() just as it is being updated
+        * read by i915_request_retire() just as it is being updated
         * by execlists, as although the breadcrumb is complete, the context
         * switch hasn't been seen.
         */
@@ -736,8 +822,8 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
 int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
 
-u64 intel_engine_get_active_head(struct intel_engine_cs *engine);
-u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine);
+u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
+u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
 
 static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
 {
@@ -757,7 +843,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
 }
 
 int init_workarounds_ring(struct intel_engine_cs *engine);
-int intel_ring_workarounds_emit(struct drm_i915_gem_request *req);
+int intel_ring_workarounds_emit(struct i915_request *rq);
 
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
                               struct intel_instdone *instdone);
@@ -776,11 +862,16 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
        return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
 }
 
+static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
+{
+       return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
+}
+
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
 static inline void intel_wait_init(struct intel_wait *wait,
-                                  struct drm_i915_gem_request *rq)
+                                  struct i915_request *rq)
 {
        wait->tsk = current;
        wait->request = rq;
@@ -806,9 +897,9 @@ intel_wait_update_seqno(struct intel_wait *wait, u32 seqno)
 
 static inline bool
 intel_wait_update_request(struct intel_wait *wait,
-                         const struct drm_i915_gem_request *rq)
+                         const struct i915_request *rq)
 {
-       return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq));
+       return intel_wait_update_seqno(wait, i915_request_global_seqno(rq));
 }
 
 static inline bool
@@ -819,9 +910,9 @@ intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno)
 
 static inline bool
 intel_wait_check_request(const struct intel_wait *wait,
-                        const struct drm_i915_gem_request *rq)
+                        const struct i915_request *rq)
 {
-       return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq));
+       return intel_wait_check_seqno(wait, i915_request_global_seqno(rq));
 }
 
 static inline bool intel_wait_complete(const struct intel_wait *wait)
@@ -833,9 +924,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
                           struct intel_wait *wait);
 void intel_engine_remove_wait(struct intel_engine_cs *engine,
                              struct intel_wait *wait);
-void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
-                                  bool wakeup);
-void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
+void intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
+void intel_engine_cancel_signaling(struct i915_request *request);
 
 static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
 {
@@ -846,6 +936,9 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
 #define ENGINE_WAKEUP_WAITER BIT(0)
 #define ENGINE_WAKEUP_ASLEEP BIT(1)
 
+void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
+void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
+
 void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 
@@ -864,14 +957,123 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
        return batch + 6;
 }
 
+static inline u32 *
+gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)
+{
+       /* We're using qword write, offset should be aligned to 8 bytes. */
+       GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+       /* w/a for post sync ops following a GPGPU operation we
+        * need a prior CS_STALL, which is emitted by the flush
+        * following the batch.
+        */
+       *cs++ = GFX_OP_PIPE_CONTROL(6);
+       *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
+               PIPE_CONTROL_QW_WRITE;
+       *cs++ = gtt_offset;
+       *cs++ = 0;
+       *cs++ = value;
+       /* We're thrashing one dword of HWS. */
+       *cs++ = 0;
+
+       return cs;
+}
+
+static inline u32 *
+gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
+{
+       /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
+       GEM_BUG_ON(gtt_offset & (1 << 5));
+       /* Offset should be aligned to 8 bytes for both (QW/DW) write types */
+       GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+       *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+       *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
+       *cs++ = 0;
+       *cs++ = value;
+
+       return cs;
+}
+
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
-void intel_engines_mark_idle(struct drm_i915_private *i915);
+bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
+
+void intel_engines_park(struct drm_i915_private *i915);
+void intel_engines_unpark(struct drm_i915_private *i915);
+
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
+unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
 
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
 
-void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p);
+__printf(3, 4)
+void intel_engine_dump(struct intel_engine_cs *engine,
+                      struct drm_printer *m,
+                      const char *header, ...);
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
+
+static inline void intel_engine_context_in(struct intel_engine_cs *engine)
+{
+       unsigned long flags;
+
+       if (READ_ONCE(engine->stats.enabled) == 0)
+               return;
+
+       spin_lock_irqsave(&engine->stats.lock, flags);
+
+       if (engine->stats.enabled > 0) {
+               if (engine->stats.active++ == 0)
+                       engine->stats.start = ktime_get();
+               GEM_BUG_ON(engine->stats.active == 0);
+       }
+
+       spin_unlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static inline void intel_engine_context_out(struct intel_engine_cs *engine)
+{
+       unsigned long flags;
+
+       if (READ_ONCE(engine->stats.enabled) == 0)
+               return;
+
+       spin_lock_irqsave(&engine->stats.lock, flags);
+
+       if (engine->stats.enabled > 0) {
+               ktime_t last;
+
+               if (engine->stats.active && --engine->stats.active == 0) {
+                       /*
+                        * Decrement the active context count and in case GPU
+                        * is now idle add up to the running total.
+                        */
+                       last = ktime_sub(ktime_get(), engine->stats.start);
+
+                       engine->stats.total = ktime_add(engine->stats.total,
+                                                       last);
+               } else if (engine->stats.active == 0) {
+                       /*
+                        * After turning on engine stats, context out might be
+                        * the first event in which case we account from the
+                        * time stats gathering was turned on.
+                        */
+                       last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+
+                       engine->stats.total = ktime_add(engine->stats.total,
+                                                       last);
+               }
+       }
+
+       spin_unlock_irqrestore(&engine->stats.lock, flags);
+}
+
+int intel_enable_engine_stats(struct intel_engine_cs *engine);
+void intel_disable_engine_stats(struct intel_engine_cs *engine);
+
+ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
 
 #endif /* _INTEL_RINGBUFFER_H_ */