#define _INTEL_RINGBUFFER_H_
#include <linux/hashtable.h>
+
#include "i915_gem_batch_pool.h"
-#include "i915_gem_request.h"
#include "i915_gem_timeline.h"
+
+#include "i915_pmu.h"
+#include "i915_request.h"
#include "i915_selftest.h"
struct drm_printer;
/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
* do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
*/
-#define gen8_semaphore_seqno_size sizeof(uint64_t)
-#define GEN8_SEMAPHORE_OFFSET(__from, __to) \
- (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size)
-#define GEN8_SIGNAL_OFFSET(__ring, to) \
- (dev_priv->semaphore->node.start + \
- GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
-#define GEN8_WAIT_OFFSET(__ring, from) \
- (dev_priv->semaphore->node.start + \
- GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
-
enum intel_engine_hangcheck_action {
ENGINE_IDLE = 0,
ENGINE_WAIT,
unsigned long action_timestamp;
int deadlock;
struct intel_instdone instdone;
- struct drm_i915_gem_request *active_request;
+ struct i915_request *active_request;
bool stalled;
};
struct i915_vma *vma;
};
-struct drm_i915_gem_request;
-struct intel_render_state;
+struct i915_request;
/*
* Engine IDs definitions.
*/
struct intel_engine_execlists {
/**
- * @irq_tasklet: softirq tasklet for bottom handler
+ * @tasklet: softirq tasklet for bottom handler
*/
- struct tasklet_struct irq_tasklet;
+ struct tasklet_struct tasklet;
/**
* @default_priolist: priority list for I915_PRIORITY_NORMAL
*/
bool no_priolist;
+ /**
+ * @elsp: the ExecList Submission Port register
+ */
+ u32 __iomem *elsp;
+
/**
* @port: execlist port states
*
/**
* @request_count: combined request and submission count
*/
- struct drm_i915_gem_request *request_count;
+ struct i915_request *request_count;
#define EXECLIST_COUNT_BITS 2
#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
unsigned int active;
#define EXECLISTS_ACTIVE_USER 0
#define EXECLISTS_ACTIVE_PREEMPT 1
+#define EXECLISTS_ACTIVE_HWACK 2
/**
* @port_mask: number of execlist ports - 1
*/
unsigned int port_mask;
+ /**
+ * @queue_priority: Highest pending priority.
+ *
+ * When we add requests into the queue, or adjust the priority of
+ * executing requests, we compute the maximum priority of those
+ * pending requests. We can then use this value to determine if
+ * we need to preempt the executing requests to service the queue.
+ */
+ int queue_priority;
+
/**
* @queue: queue of requests, in priority lists
*/
* @csb_use_mmio: access csb through mmio, instead of hwsp
*/
bool csb_use_mmio;
+
+ /**
+ * @preempt_complete_status: expected CSB upon completing preemption
+ */
+ u32 preempt_complete_status;
};
#define INTEL_ENGINE_CS_MAX_NAME 8
struct intel_engine_cs {
struct drm_i915_private *i915;
char name[INTEL_ENGINE_CS_MAX_NAME];
+
enum intel_engine_id id;
- unsigned int uabi_id;
unsigned int hw_id;
unsigned int guc_id;
+ u8 uabi_id;
+ u8 uabi_class;
+
u8 class;
u8 instance;
u32 context_size;
struct intel_ring *buffer;
struct intel_timeline *timeline;
- struct intel_render_state *render_state;
+ struct drm_i915_gem_object *default_state;
atomic_t irq_count;
unsigned long irq_posted;
struct rb_root waiters; /* sorted by retirement, priority */
struct rb_root signals; /* sorted by retirement */
struct task_struct *signaler; /* used for fence signalling */
- struct drm_i915_gem_request __rcu *first_signal;
+ struct i915_request __rcu *first_signal;
struct timer_list fake_irq; /* used after a missed interrupt */
struct timer_list hangcheck; /* detect missed interrupts */
unsigned int hangcheck_interrupts;
+ unsigned int irq_enabled;
bool irq_armed : 1;
- bool irq_enabled : 1;
I915_SELFTEST_DECLARE(bool mock : 1);
} breadcrumbs;
+ struct {
+ /**
+ * @enable: Bitmask of enable sample events on this engine.
+ *
+ * Bits correspond to sample event types, for instance
+ * I915_SAMPLE_QUEUED is bit 0 etc.
+ */
+ u32 enable;
+ /**
+ * @enable_count: Reference count for the enabled samplers.
+ *
+ * Index number corresponds to the bit number from @enable.
+ */
+ unsigned int enable_count[I915_PMU_SAMPLE_BITS];
+ /**
+ * @sample: Counter values for sampling events.
+ *
+ * Our internal timer stores the current counters in this field.
+ */
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
+ struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
+ } pmu;
+
/*
* A pool of objects to use as shadow copies of client batch buffers
* when the command parser is enabled. Prevents the client from
int (*init_hw)(struct intel_engine_cs *engine);
void (*reset_hw)(struct intel_engine_cs *engine,
- struct drm_i915_gem_request *req);
+ struct i915_request *rq);
+
+ void (*park)(struct intel_engine_cs *engine);
+ void (*unpark)(struct intel_engine_cs *engine);
void (*set_default_submission)(struct intel_engine_cs *engine);
struct i915_gem_context *ctx);
void (*context_unpin)(struct intel_engine_cs *engine,
struct i915_gem_context *ctx);
- int (*request_alloc)(struct drm_i915_gem_request *req);
- int (*init_context)(struct drm_i915_gem_request *req);
+ int (*request_alloc)(struct i915_request *rq);
+ int (*init_context)(struct i915_request *rq);
- int (*emit_flush)(struct drm_i915_gem_request *request,
- u32 mode);
+ int (*emit_flush)(struct i915_request *request, u32 mode);
#define EMIT_INVALIDATE BIT(0)
#define EMIT_FLUSH BIT(1)
#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH)
- int (*emit_bb_start)(struct drm_i915_gem_request *req,
+ int (*emit_bb_start)(struct i915_request *rq,
u64 offset, u32 length,
unsigned int dispatch_flags);
#define I915_DISPATCH_SECURE BIT(0)
#define I915_DISPATCH_PINNED BIT(1)
#define I915_DISPATCH_RS BIT(2)
- void (*emit_breadcrumb)(struct drm_i915_gem_request *req,
- u32 *cs);
+ void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
int emit_breadcrumb_sz;
/* Pass the request to the hardware queue (e.g. directly into
* This is called from an atomic context with irqs disabled; must
* be irq safe.
*/
- void (*submit_request)(struct drm_i915_gem_request *req);
+ void (*submit_request)(struct i915_request *rq);
/* Call when the priority on a request has changed and it and its
* dependencies may need rescheduling. Note the request itself may
*
* Called under the struct_mutex.
*/
- void (*schedule)(struct drm_i915_gem_request *request,
- int priority);
+ void (*schedule)(struct i915_request *request, int priority);
/*
* Cancel all requests on the hardware, or queued for execution.
* ie. transpose of f(x, y)
*/
struct {
- union {
#define GEN6_SEMAPHORE_LAST VECS_HW
#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1)
#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0)
- struct {
- /* our mbox written by others */
- u32 wait[GEN6_NUM_SEMAPHORES];
- /* mboxes this ring signals to */
- i915_reg_t signal[GEN6_NUM_SEMAPHORES];
- } mbox;
- u64 signal_ggtt[I915_NUM_ENGINES];
- };
+ struct {
+ /* our mbox written by others */
+ u32 wait[GEN6_NUM_SEMAPHORES];
+ /* mboxes this ring signals to */
+ i915_reg_t signal[GEN6_NUM_SEMAPHORES];
+ } mbox;
/* AKA wait() */
- int (*sync_to)(struct drm_i915_gem_request *req,
- struct drm_i915_gem_request *signal);
- u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs);
+ int (*sync_to)(struct i915_request *rq,
+ struct i915_request *signal);
+ u32 *(*signal)(struct i915_request *rq, u32 *cs);
} semaphore;
struct intel_engine_execlists execlists;
* stream (ring).
*/
struct i915_gem_context *legacy_active_context;
+ struct i915_hw_ppgtt *legacy_active_ppgtt;
/* status_notifier: list of callbacks for context-switch changes */
struct atomic_notifier_head context_status_notifier;
struct intel_engine_hangcheck hangcheck;
- bool needs_cmd_parser;
+#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_SUPPORTS_STATS BIT(1)
+ unsigned int flags;
/*
* Table of commands the command parser needs to know about
* certain bits to encode the command length in the header).
*/
u32 (*get_cmd_length_mask)(u32 cmd_header);
+
+ struct {
+ /**
+ * @lock: Lock protecting the below fields.
+ */
+ spinlock_t lock;
+ /**
+ * @enabled: Reference count indicating number of listeners.
+ */
+ unsigned int enabled;
+ /**
+ * @active: Number of contexts currently scheduled in.
+ */
+ unsigned int active;
+ /**
+ * @enabled_at: Timestamp when busy stats were enabled.
+ */
+ ktime_t enabled_at;
+ /**
+ * @start: Timestamp of the last idle to active transition.
+ *
+ * Idle is defined as active == 0, active is active > 0.
+ */
+ ktime_t start;
+ /**
+ * @total: Total time this engine was busy.
+ *
+ * Accumulated time not counting the most recent block in cases
+ * where engine is currently busy (active > 0).
+ */
+ ktime_t total;
+ } stats;
};
+static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+}
+
+static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_SUPPORTS_STATS;
+}
+
static inline void
execlists_set_active(struct intel_engine_execlists *execlists,
unsigned int bit)
return test_bit(bit, (unsigned long *)&execlists->active);
}
+void
+execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
+
+void
+execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
+
static inline unsigned int
execlists_num_ports(const struct intel_engine_execlists * const execlists)
{
}
static inline u32
-intel_read_status_page(struct intel_engine_cs *engine, int reg)
+intel_read_status_page(const struct intel_engine_cs *engine, int reg)
{
/* Ensure that the compiler doesn't optimize away the load. */
return READ_ONCE(engine->status_page.page_addr[reg]);
*/
#define I915_GEM_HWS_INDEX 0x30
#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
+#define I915_GEM_HWS_PREEMPT_INDEX 0x32
+#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
#define I915_GEM_HWS_SCRATCH_INDEX 0x40
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
-int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
+int __must_check intel_ring_cacheline_align(struct i915_request *rq);
-u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req,
- unsigned int n);
+int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
+u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
-static inline void
-intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
+static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
{
/* Dummy function.
*
* reserved for the command packet (i.e. the value passed to
* intel_ring_begin()).
*/
- GEM_BUG_ON((req->ring->vaddr + req->ring->emit) != cs);
+ GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
}
-static inline u32
-intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
{
return pos & (ring->size - 1);
}
-static inline u32
-intel_ring_offset(const struct drm_i915_gem_request *req, void *addr)
+static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
{
/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
- u32 offset = addr - req->ring->vaddr;
- GEM_BUG_ON(offset > req->ring->size);
- return intel_ring_wrap(req->ring, offset);
+ u32 offset = addr - rq->ring->vaddr;
+ GEM_BUG_ON(offset > rq->ring->size);
+ return intel_ring_wrap(rq->ring, offset);
}
static inline void
{
/* Whilst writes to the tail are strictly order, there is no
* serialisation between readers and the writers. The tail may be
- * read by i915_gem_request_retire() just as it is being updated
+ * read by i915_request_retire() just as it is being updated
* by execlists, as although the breadcrumb is complete, the context
* switch hasn't been seen.
*/
int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
-u64 intel_engine_get_active_head(struct intel_engine_cs *engine);
-u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine);
+u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
+u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
{
}
int init_workarounds_ring(struct intel_engine_cs *engine);
-int intel_ring_workarounds_emit(struct drm_i915_gem_request *req);
+int intel_ring_workarounds_emit(struct i915_request *rq);
void intel_engine_get_instdone(struct intel_engine_cs *engine,
struct intel_instdone *instdone);
return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
}
+static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
+{
+ return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
+}
+
/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
static inline void intel_wait_init(struct intel_wait *wait,
- struct drm_i915_gem_request *rq)
+ struct i915_request *rq)
{
wait->tsk = current;
wait->request = rq;
static inline bool
intel_wait_update_request(struct intel_wait *wait,
- const struct drm_i915_gem_request *rq)
+ const struct i915_request *rq)
{
- return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq));
+ return intel_wait_update_seqno(wait, i915_request_global_seqno(rq));
}
static inline bool
static inline bool
intel_wait_check_request(const struct intel_wait *wait,
- const struct drm_i915_gem_request *rq)
+ const struct i915_request *rq)
{
- return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq));
+ return intel_wait_check_seqno(wait, i915_request_global_seqno(rq));
}
static inline bool intel_wait_complete(const struct intel_wait *wait)
struct intel_wait *wait);
void intel_engine_remove_wait(struct intel_engine_cs *engine,
struct intel_wait *wait);
-void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
- bool wakeup);
-void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
+void intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
+void intel_engine_cancel_signaling(struct i915_request *request);
static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
{
#define ENGINE_WAKEUP_WAITER BIT(0)
#define ENGINE_WAKEUP_ASLEEP BIT(1)
+void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
+void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
+
void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
return batch + 6;
}
+static inline u32 *
+gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)
+{
+ /* We're using qword write, offset should be aligned to 8 bytes. */
+ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+ /* w/a for post sync ops following a GPGPU operation we
+ * need a prior CS_STALL, which is emitted by the flush
+ * following the batch.
+ */
+ *cs++ = GFX_OP_PIPE_CONTROL(6);
+ *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE;
+ *cs++ = gtt_offset;
+ *cs++ = 0;
+ *cs++ = value;
+ /* We're thrashing one dword of HWS. */
+ *cs++ = 0;
+
+ return cs;
+}
+
+static inline u32 *
+gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
+{
+ /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
+ GEM_BUG_ON(gtt_offset & (1 << 5));
+ /* Offset should be aligned to 8 bytes for both (QW/DW) write types */
+ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+ *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+ *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
+ *cs++ = 0;
+ *cs++ = value;
+
+ return cs;
+}
+
bool intel_engine_is_idle(struct intel_engine_cs *engine);
bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
-void intel_engines_mark_idle(struct drm_i915_private *i915);
+bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
+
+void intel_engines_park(struct drm_i915_private *i915);
+void intel_engines_unpark(struct drm_i915_private *i915);
+
void intel_engines_reset_default_submission(struct drm_i915_private *i915);
+unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
-void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p);
+__printf(3, 4)
+void intel_engine_dump(struct intel_engine_cs *engine,
+ struct drm_printer *m,
+ const char *header, ...);
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
+
+static inline void intel_engine_context_in(struct intel_engine_cs *engine)
+{
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ spin_lock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ if (engine->stats.active++ == 0)
+ engine->stats.start = ktime_get();
+ GEM_BUG_ON(engine->stats.active == 0);
+ }
+
+ spin_unlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static inline void intel_engine_context_out(struct intel_engine_cs *engine)
+{
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ spin_lock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ ktime_t last;
+
+ if (engine->stats.active && --engine->stats.active == 0) {
+ /*
+ * Decrement the active context count and in case GPU
+ * is now idle add up to the running total.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.start);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ } else if (engine->stats.active == 0) {
+ /*
+ * After turning on engine stats, context out might be
+ * the first event in which case we account from the
+ * time stats gathering was turned on.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ }
+ }
+
+ spin_unlock_irqrestore(&engine->stats.lock, flags);
+}
+
+int intel_enable_engine_stats(struct intel_engine_cs *engine);
+void intel_disable_engine_stats(struct intel_engine_cs *engine);
+
+ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
#endif /* _INTEL_RINGBUFFER_H_ */