if (++i915_load_fail_count == i915_modparams.inject_load_failure) {
DRM_INFO("Injecting failure at checkpoint %u [%s:%d]\n",
i915_modparams.inject_load_failure, func, line);
+ i915_modparams.inject_load_failure = 0;
return true;
}
static bool i915_error_injected(struct drm_i915_private *dev_priv)
{
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
- return i915_modparams.inject_load_failure &&
- i915_load_fail_count == i915_modparams.inject_load_failure;
+ return i915_load_fail_count && !i915_modparams.inject_load_failure;
#else
return false;
#endif
}
- #define i915_load_error(dev_priv, fmt, ...) \
- __i915_printk(dev_priv, \
- i915_error_injected(dev_priv) ? KERN_DEBUG : KERN_ERR, \
+ #define i915_load_error(i915, fmt, ...) \
+ __i915_printk(i915, \
+ i915_error_injected(i915) ? KERN_DEBUG : KERN_ERR, \
fmt, ##__VA_ARGS__)
/* Map PCH device id to PCH type, or PCH_NONE if unknown. */
id = INTEL_PCH_SPT_DEVICE_ID_TYPE;
else if (IS_COFFEELAKE(dev_priv) || IS_CANNONLAKE(dev_priv))
id = INTEL_PCH_CNP_DEVICE_ID_TYPE;
+ else if (IS_ICELAKE(dev_priv))
+ id = INTEL_PCH_ICP_DEVICE_ID_TYPE;
if (id)
DRM_DEBUG_KMS("Assuming PCH ID %04x\n", id);
.can_switch = i915_switcheroo_can_switch,
};
- static void i915_gem_fini(struct drm_i915_private *dev_priv)
- {
- /* Flush any outstanding unpin_work. */
- i915_gem_drain_workqueue(dev_priv);
-
- mutex_lock(&dev_priv->drm.struct_mutex);
- intel_uc_fini_hw(dev_priv);
- intel_uc_fini(dev_priv);
- i915_gem_cleanup_engines(dev_priv);
- i915_gem_contexts_fini(dev_priv);
- mutex_unlock(&dev_priv->drm.struct_mutex);
-
- intel_uc_fini_misc(dev_priv);
- i915_gem_cleanup_userptr(dev_priv);
-
- i915_gem_drain_freed_objects(dev_priv);
-
- WARN_ON(!list_empty(&dev_priv->contexts.list));
- }
-
static int i915_load_modeset_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
return false;
}
+ static int i915_drm_prepare(struct drm_device *dev)
+ {
+ struct drm_i915_private *i915 = to_i915(dev);
+ int err;
+
+ /*
+ * NB intel_display_suspend() may issue new requests after we've
+ * ostensibly marked the GPU as ready-to-sleep here. We need to
+ * split out that work and pull it forward so that after point,
+ * the GPU is not woken again.
+ */
+ err = i915_gem_suspend(i915);
+ if (err)
+ dev_err(&i915->drm.pdev->dev,
+ "GEM idle failed, suspend/resume might fail\n");
+
+ return err;
+ }
+
static int i915_drm_suspend(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct pci_dev *pdev = dev_priv->drm.pdev;
pci_power_t opregion_target_state;
- int error;
/* ignore lid events during suspend */
mutex_lock(&dev_priv->modeset_restore_lock);
pci_save_state(pdev);
- error = i915_gem_suspend(dev_priv);
- if (error) {
- dev_err(&pdev->dev,
- "GEM idle failed, resume might fail\n");
- goto out;
- }
-
intel_display_suspend(dev);
intel_dp_mst_suspend(dev);
opregion_target_state = suspend_to_idle(dev_priv) ? PCI_D1 : PCI_D3cold;
intel_opregion_notify_adapter(dev_priv, opregion_target_state);
- intel_uncore_suspend(dev_priv);
intel_opregion_unregister(dev_priv);
intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
intel_csr_ucode_suspend(dev_priv);
- out:
enable_rpm_wakeref_asserts(dev_priv);
- return error;
+ return 0;
}
static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
disable_rpm_wakeref_asserts(dev_priv);
+ i915_gem_suspend_late(dev_priv);
+
intel_display_set_init_power(dev_priv, false);
+ intel_uncore_suspend(dev_priv);
/*
* In case of firmware assisted context save/restore don't manually
return ret;
}
+ static int i915_pm_prepare(struct device *kdev)
+ {
+ struct pci_dev *pdev = to_pci_dev(kdev);
+ struct drm_device *dev = pci_get_drvdata(pdev);
+
+ if (!dev) {
+ dev_err(kdev, "DRM not initialized, aborting suspend.\n");
+ return -ENODEV;
+ }
+
+ if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+ return 0;
+
+ return i915_drm_prepare(dev);
+ }
+
static int i915_pm_suspend(struct device *kdev)
{
struct pci_dev *pdev = to_pci_dev(kdev);
* S0ix (via system suspend) and S3 event handlers [PMSG_SUSPEND,
* PMSG_RESUME]
*/
+ .prepare = i915_pm_prepare,
.suspend = i915_pm_suspend,
.suspend_late = i915_pm_suspend_late,
.resume_early = i915_pm_resume_early,
DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id_ioctl, 0),
DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
- DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
- DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
- DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW),
+ DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER),
DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW),
#define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS)
static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine);
+ struct intel_engine_cs *engine,
+ struct intel_context *ce);
static void execlists_init_reg_state(u32 *reg_state,
struct i915_gem_context *ctx,
struct intel_engine_cs *engine,
!i915_request_completed(last));
}
- /**
- * intel_lr_context_descriptor_update() - calculate & cache the descriptor
- * descriptor for a pinned context
- * @ctx: Context to work on
- * @engine: Engine the descriptor will be used with
- *
+ /*
* The context descriptor encodes various attributes of a context,
* including its GTT address and some flags. Because it's fairly
* expensive to calculate, we'll just do it once and cache the result,
*
* bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template)
* bits 12-31: LRCA, GTT address of (the HWSP of) this context
- * bits 32-52: ctx ID, a globally unique tag
+ * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC)
* bits 53-54: mbz, reserved for use by hardware
* bits 55-63: group ID, currently unused and set to 0
*
*/
static void
intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+ struct intel_engine_cs *engine,
+ struct intel_context *ce)
{
- struct intel_context *ce = to_intel_context(ctx, engine);
u64 desc;
BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
/* bits 12-31 */
GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
+ /*
+ * The following 32bits are copied into the OA reports (dword 2).
+ * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
+ * anything below.
+ */
if (INTEL_GEN(ctx->i915) >= 11) {
GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
static u64 execlists_update_context(struct i915_request *rq)
{
- struct intel_context *ce = to_intel_context(rq->ctx, rq->engine);
+ struct intel_context *ce = rq->hw_context;
struct i915_hw_ppgtt *ppgtt =
- rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
+ rq->gem_context->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
u32 *reg_state = ce->lrc_reg_state;
reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
* PML4 is allocated during ppgtt init, so this is not needed
* in 48-bit mode.
*/
- if (ppgtt && !i915_vm_is_48bit(&ppgtt->base))
+ if (ppgtt && !i915_vm_is_48bit(&ppgtt->vm))
execlists_update_context_pdps(ppgtt, reg_state);
return ce->lrc_desc;
execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
}
- static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
+ static bool ctx_single_port_submission(const struct intel_context *ce)
{
return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
- i915_gem_context_force_single_submission(ctx));
+ i915_gem_context_force_single_submission(ce->gem_context));
}
- static bool can_merge_ctx(const struct i915_gem_context *prev,
- const struct i915_gem_context *next)
+ static bool can_merge_ctx(const struct intel_context *prev,
+ const struct intel_context *next)
{
if (prev != next)
return false;
if (execlists->ctrl_reg)
writel(EL_CTRL_LOAD, execlists->ctrl_reg);
- execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
- execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+ execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+ }
+
+ static void complete_preempt_context(struct intel_engine_execlists *execlists)
+ {
+ GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
+
+ execlists_cancel_port_requests(execlists);
+ execlists_unwind_incomplete_requests(execlists);
+
+ execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
}
static bool __execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(!execlists_is_active(execlists,
EXECLISTS_ACTIVE_USER));
GEM_BUG_ON(!port_count(&port[0]));
- if (port_count(&port[0]) > 1)
- return false;
/*
* If we write to ELSP a second time before the HW has had
* second request, and so we never need to tell the
* hardware about the first.
*/
- if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
+ if (last &&
+ !can_merge_ctx(rq->hw_context, last->hw_context)) {
/*
* If we are on the second port and cannot
* combine this request with the last, then we
* the same context (even though a different
* request) to the second port.
*/
- if (ctx_single_port_submission(last->ctx) ||
- ctx_single_port_submission(rq->ctx)) {
+ if (ctx_single_port_submission(last->hw_context) ||
+ ctx_single_port_submission(rq->hw_context)) {
__list_del_many(&p->requests,
&rq->sched.link);
goto done;
}
- GEM_BUG_ON(last->ctx == rq->ctx);
+ GEM_BUG_ON(last->hw_context == rq->hw_context);
if (submit)
port_assign(port, last);
port++;
}
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
execlists_user_end(execlists);
}
local_irq_restore(flags);
}
- /*
- * Check the unread Context Status Buffers and manage the submission of new
- * contexts to the ELSP accordingly.
- */
- static void execlists_submission_tasklet(unsigned long data)
+ static void process_csb(struct intel_engine_cs *engine)
{
- struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port *port = execlists->port;
- struct drm_i915_private *dev_priv = engine->i915;
+ struct drm_i915_private *i915 = engine->i915;
bool fw = false;
- /*
- * We can skip acquiring intel_runtime_pm_get() here as it was taken
- * on our behalf by the request (see i915_gem_mark_busy()) and it will
- * not be relinquished until the device is idle (see
- * i915_gem_idle_work_handler()). As a precaution, we make sure
- * that all ELSP are drained i.e. we have processed the CSB,
- * before allowing ourselves to idle and calling intel_runtime_pm_put().
- */
- GEM_BUG_ON(!dev_priv->gt.awake);
-
- /*
- * Prefer doing test_and_clear_bit() as a two stage operation to avoid
- * imposing the cost of a locked atomic transaction when submitting a
- * new request (outside of the context-switch interrupt).
- */
- while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
+ do {
/* The HWSP contains a (cacheable) mirror of the CSB */
const u32 *buf =
&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
if (unlikely(execlists->csb_use_mmio)) {
buf = (u32 * __force)
- (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
- execlists->csb_head = -1; /* force mmio read of CSB ptrs */
+ (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+ execlists->csb_head = -1; /* force mmio read of CSB */
}
/* Clear before reading to catch new interrupts */
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
smp_mb__after_atomic();
- if (unlikely(execlists->csb_head == -1)) { /* following a reset */
+ if (unlikely(execlists->csb_head == -1)) { /* after a reset */
if (!fw) {
- intel_uncore_forcewake_get(dev_priv,
- execlists->fw_domains);
+ intel_uncore_forcewake_get(i915, execlists->fw_domains);
fw = true;
}
- head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+ head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
tail = GEN8_CSB_WRITE_PTR(head);
head = GEN8_CSB_READ_PTR(head);
execlists->csb_head = head;
} else {
const int write_idx =
- intel_hws_csb_write_index(dev_priv) -
+ intel_hws_csb_write_index(i915) -
I915_HWS_CSB_BUF0_INDEX;
head = execlists->csb_head;
}
GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
engine->name,
- head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
- tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+ head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+ tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
while (head != tail) {
struct i915_request *rq;
if (++head == GEN8_CSB_ENTRIES)
head = 0;
- /* We are flying near dragons again.
+ /*
+ * We are flying near dragons again.
*
* We hold a reference to the request in execlist_port[]
* but no more than that. We are operating in softirq
if (status & GEN8_CTX_STATUS_COMPLETE &&
buf[2*head + 1] == execlists->preempt_complete_status) {
GEM_TRACE("%s preempt-idle\n", engine->name);
-
- execlists_cancel_port_requests(execlists);
- execlists_unwind_incomplete_requests(execlists);
-
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT));
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT);
+ complete_preempt_context(execlists);
continue;
}
if (head != execlists->csb_head) {
execlists->csb_head = head;
writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
- dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+ i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
}
- }
+ } while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
- if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
- execlists_dequeue(engine);
+ if (unlikely(fw))
+ intel_uncore_forcewake_put(i915, execlists->fw_domains);
+ }
+
+ /*
+ * Check the unread Context Status Buffers and manage the submission of new
+ * contexts to the ELSP accordingly.
+ */
+ static void execlists_submission_tasklet(unsigned long data)
+ {
+ struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+
+ GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
+ engine->name,
+ engine->i915->gt.awake,
+ engine->execlists.active,
+ test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
+
+ /*
+ * We can skip acquiring intel_runtime_pm_get() here as it was taken
+ * on our behalf by the request (see i915_gem_mark_busy()) and it will
+ * not be relinquished until the device is idle (see
+ * i915_gem_idle_work_handler()). As a precaution, we make sure
+ * that all ELSP are drained i.e. we have processed the CSB,
+ * before allowing ourselves to idle and calling intel_runtime_pm_put().
+ */
+ GEM_BUG_ON(!engine->i915->gt.awake);
- if (fw)
- intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
+ /*
+ * Prefer doing test_and_clear_bit() as a two stage operation to avoid
+ * imposing the cost of a locked atomic transaction when submitting a
+ * new request (outside of the context-switch interrupt).
+ */
+ if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+ process_csb(engine);
+
+ if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+ execlists_dequeue(engine);
/* If the engine is now idle, so should be the flag; and vice versa. */
GEM_BUG_ON(execlists_is_active(&engine->execlists,
spin_unlock_irq(&engine->timeline.lock);
}
+ static void execlists_context_destroy(struct intel_context *ce)
+ {
+ GEM_BUG_ON(!ce->state);
+ GEM_BUG_ON(ce->pin_count);
+
+ intel_ring_free(ce->ring);
+ __i915_gem_object_release_unless_active(ce->state->obj);
+ }
+
+ static void execlists_context_unpin(struct intel_context *ce)
+ {
+ intel_ring_unpin(ce->ring);
+
+ ce->state->obj->pin_global--;
+ i915_gem_object_unpin_map(ce->state->obj);
+ i915_vma_unpin(ce->state);
+
+ i915_gem_context_put(ce->gem_context);
+ }
+
static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
{
unsigned int flags;
return i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, flags);
}
- static struct intel_ring *
- execlists_context_pin(struct intel_engine_cs *engine,
- struct i915_gem_context *ctx)
+ static struct intel_context *
+ __execlists_context_pin(struct intel_engine_cs *engine,
+ struct i915_gem_context *ctx,
+ struct intel_context *ce)
{
- struct intel_context *ce = to_intel_context(ctx, engine);
void *vaddr;
int ret;
- lockdep_assert_held(&ctx->i915->drm.struct_mutex);
-
- if (likely(ce->pin_count++))
- goto out;
- GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
-
- ret = execlists_context_deferred_alloc(ctx, engine);
+ ret = execlists_context_deferred_alloc(ctx, engine, ce);
if (ret)
goto err;
GEM_BUG_ON(!ce->state);
if (ret)
goto unpin_map;
- intel_lr_context_descriptor_update(ctx, engine);
+ intel_lr_context_descriptor_update(ctx, engine, ce);
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
ce->state->obj->pin_global++;
i915_gem_context_get(ctx);
- out:
- return ce->ring;
+ return ce;
unpin_map:
i915_gem_object_unpin_map(ce->state->obj);
return ERR_PTR(ret);
}
- static void execlists_context_unpin(struct intel_engine_cs *engine,
- struct i915_gem_context *ctx)
+ static const struct intel_context_ops execlists_context_ops = {
+ .unpin = execlists_context_unpin,
+ .destroy = execlists_context_destroy,
+ };
+
+ static struct intel_context *
+ execlists_context_pin(struct intel_engine_cs *engine,
+ struct i915_gem_context *ctx)
{
struct intel_context *ce = to_intel_context(ctx, engine);
lockdep_assert_held(&ctx->i915->drm.struct_mutex);
- GEM_BUG_ON(ce->pin_count == 0);
- if (--ce->pin_count)
- return;
-
- intel_ring_unpin(ce->ring);
+ if (likely(ce->pin_count++))
+ return ce;
+ GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
- ce->state->obj->pin_global--;
- i915_gem_object_unpin_map(ce->state->obj);
- i915_vma_unpin(ce->state);
+ ce->ops = &execlists_context_ops;
- i915_gem_context_put(ctx);
+ return __execlists_context_pin(engine, ctx, ce);
}
static int execlists_request_alloc(struct i915_request *request)
{
- struct intel_context *ce =
- to_intel_context(request->ctx, request->engine);
int ret;
- GEM_BUG_ON(!ce->pin_count);
+ GEM_BUG_ON(!request->hw_context->pin_count);
/* Flush enough space to reduce the likelihood of waiting after
* we start building the request - in which case we will just
if (IS_ERR(obj))
return PTR_ERR(obj);
- vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
+ vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err;
I915_WRITE(RING_MODE_GEN7(engine),
_MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
+ I915_WRITE(RING_MI_MODE(engine->mmio_base),
+ _MASKED_BIT_DISABLE(STOP_RING));
+
I915_WRITE(RING_HWS_PGA(engine->mmio_base),
engine->status_page.ggtt_offset);
POSTING_READ(RING_HWS_PGA(engine->mmio_base));
+ }
- /* Following the reset, we need to reload the CSB read/write pointers */
- engine->execlists.csb_head = -1;
+ static bool unexpected_starting_state(struct intel_engine_cs *engine)
+ {
+ struct drm_i915_private *dev_priv = engine->i915;
+ bool unexpected = false;
+
+ if (I915_READ(RING_MI_MODE(engine->mmio_base)) & STOP_RING) {
+ DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
+ unexpected = true;
+ }
+
+ return unexpected;
}
static int gen8_init_common_ring(struct intel_engine_cs *engine)
intel_engine_reset_breadcrumbs(engine);
intel_engine_init_hangcheck(engine);
+ if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) {
+ struct drm_printer p = drm_debug_printer(__func__);
+
+ intel_engine_dump(engine, &p, NULL);
+ }
+
enable_execlists(engine);
/* After a GPU reset, we may have requests to replay */
return 0;
}
- static void reset_common_ring(struct intel_engine_cs *engine,
- struct i915_request *request)
+ static struct i915_request *
+ execlists_reset_prepare(struct intel_engine_cs *engine)
+ {
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request *request, *active;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * Prevent request submission to the hardware until we have
+ * completed the reset in i915_gem_reset_finish(). If a request
+ * is completed by one engine, it may then queue a request
+ * to a second via its execlists->tasklet *just* as we are
+ * calling engine->init_hw() and also writing the ELSP.
+ * Turning off the execlists->tasklet until the reset is over
+ * prevents the race.
+ */
+ __tasklet_disable_sync_once(&execlists->tasklet);
+
+ /*
+ * We want to flush the pending context switches, having disabled
+ * the tasklet above, we can assume exclusive access to the execlists.
+ * For this allows us to catch up with an inflight preemption event,
+ * and avoid blaming an innocent request if the stall was due to the
+ * preemption itself.
+ */
+ if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+ process_csb(engine);
+
+ /*
+ * The last active request can then be no later than the last request
+ * now in ELSP[0]. So search backwards from there, so that if the GPU
+ * has advanced beyond the last CSB update, it will be pardoned.
+ */
+ active = NULL;
+ request = port_request(execlists->port);
+ if (request) {
+ unsigned long flags;
+
+ /*
+ * Prevent the breadcrumb from advancing before we decide
+ * which request is currently active.
+ */
+ intel_engine_stop_cs(engine);
+
+ spin_lock_irqsave(&engine->timeline.lock, flags);
+ list_for_each_entry_from_reverse(request,
+ &engine->timeline.requests,
+ link) {
+ if (__i915_request_completed(request,
+ request->global_seqno))
+ break;
+
+ active = request;
+ }
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+ }
+
+ return active;
+ }
+
+ static void execlists_reset(struct intel_engine_cs *engine,
+ struct i915_request *request)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
unsigned long flags;
__unwind_incomplete_requests(engine);
spin_unlock(&engine->timeline.lock);
+ /* Following the reset, we need to reload the CSB read/write pointers */
+ engine->execlists.csb_head = -1;
+
local_irq_restore(flags);
/*
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
- regs = to_intel_context(request->ctx, engine)->lrc_reg_state;
- if (engine->default_state) {
- void *defaults;
-
- defaults = i915_gem_object_pin_map(engine->default_state,
- I915_MAP_WB);
- if (!IS_ERR(defaults)) {
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- defaults + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
- i915_gem_object_unpin_map(engine->default_state);
- }
+ regs = request->hw_context->lrc_reg_state;
+ if (engine->pinned_default_state) {
+ memcpy(regs, /* skip restoring the vanilla PPHWSP */
+ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+ engine->context_size - PAGE_SIZE);
}
- execlists_init_reg_state(regs, request->ctx, engine, request->ring);
+ execlists_init_reg_state(regs,
+ request->gem_context, engine, request->ring);
/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
unwind_wa_tail(request);
}
+ static void execlists_reset_finish(struct intel_engine_cs *engine)
+ {
+ /*
+ * Flush the tasklet while we still have the forcewake to be sure
+ * that it is not allowed to sleep before we restart and reload a
+ * context.
+ *
+ * As before (with execlists_reset_prepare) we rely on the caller
+ * serialising multiple attempts to reset so that we know that we
+ * are the only one manipulating tasklet state.
+ */
+ __tasklet_enable_sync_once(&engine->execlists.tasklet);
+
+ GEM_TRACE("%s\n", engine->name);
+ }
+
static int intel_logical_ring_emit_pdps(struct i915_request *rq)
{
- struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
+ struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
struct intel_engine_cs *engine = rq->engine;
const int num_lri_cmds = GEN8_3LVL_PDPES * 2;
u32 *cs;
* it is unsafe in case of lite-restore (because the ctx is
* not idle). PML4 is allocated during ppgtt init so this is
* not needed in 48-bit.*/
- if (rq->ctx->ppgtt &&
- (intel_engine_flag(rq->engine) & rq->ctx->ppgtt->pd_dirty_rings) &&
- !i915_vm_is_48bit(&rq->ctx->ppgtt->base) &&
+ if (rq->gem_context->ppgtt &&
+ (intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) &&
+ !i915_vm_is_48bit(&rq->gem_context->ppgtt->vm) &&
!intel_vgpu_active(rq->i915)) {
ret = intel_logical_ring_emit_pdps(rq);
if (ret)
return ret;
- rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
+ rq->gem_context->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine);
}
cs = intel_ring_begin(rq, 6);
engine->schedule = execlists_schedule;
engine->execlists.tasklet.func = execlists_submission_tasklet;
+ engine->reset.prepare = execlists_reset_prepare;
+
engine->park = NULL;
engine->unpark = NULL;
{
/* Default vfuncs which can be overriden by each engine. */
engine->init_hw = gen8_init_common_ring;
- engine->reset_hw = reset_common_ring;
- engine->context_pin = execlists_context_pin;
- engine->context_unpin = execlists_context_unpin;
+ engine->reset.prepare = execlists_reset_prepare;
+ engine->reset.reset = execlists_reset;
+ engine->reset.finish = execlists_reset_finish;
+ engine->context_pin = execlists_context_pin;
engine->request_alloc = execlists_request_alloc;
engine->emit_flush = gen8_emit_flush;
upper_32_bits(ce->lrc_desc);
}
+ engine->execlists.csb_head = -1;
+
return 0;
error:
struct drm_i915_private *dev_priv = engine->i915;
struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt;
u32 base = engine->mmio_base;
- bool rcs = engine->id == RCS;
+ bool rcs = engine->class == RENDER_CLASS;
/* A context is actually a big batch buffer with several
* MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0);
CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0);
- if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) {
+ if (ppgtt && i915_vm_is_48bit(&ppgtt->vm)) {
/* 64b PPGTT (48bit canonical)
* PDP0_DESCRIPTOR contains the base address to PML4 and
* other PDP Descriptors are ignored.
}
static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+ struct intel_engine_cs *engine,
+ struct intel_context *ce)
{
struct drm_i915_gem_object *ctx_obj;
- struct intel_context *ce = to_intel_context(ctx, engine);
struct i915_vma *vma;
uint32_t context_size;
struct intel_ring *ring;
goto error_deref_obj;
}
- vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
+ vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.vm, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto error_deref_obj;
/* program plane scaler */
if (plane_state->scaler_id >= 0) {
int scaler_id = plane_state->scaler_id;
- const struct intel_scaler *scaler;
+ const struct intel_scaler *scaler =
+ &crtc_state->scaler_state.scalers[scaler_id];
+ u16 y_hphase, uv_rgb_hphase;
+ u16 y_vphase, uv_rgb_vphase;
+
+ /* TODO: handle sub-pixel coordinates */
+ if (fb->format->format == DRM_FORMAT_NV12) {
+ y_hphase = skl_scaler_calc_phase(1, false);
+ y_vphase = skl_scaler_calc_phase(1, false);
+
+ /* MPEG2 chroma siting convention */
+ uv_rgb_hphase = skl_scaler_calc_phase(2, true);
+ uv_rgb_vphase = skl_scaler_calc_phase(2, false);
+ } else {
+ /* not used */
+ y_hphase = 0;
+ y_vphase = 0;
- scaler = &crtc_state->scaler_state.scalers[scaler_id];
+ uv_rgb_hphase = skl_scaler_calc_phase(1, false);
+ uv_rgb_vphase = skl_scaler_calc_phase(1, false);
+ }
I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id),
PS_SCALER_EN | PS_PLANE_SEL(plane_id) | scaler->mode);
I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0);
+ I915_WRITE_FW(SKL_PS_VPHASE(pipe, scaler_id),
+ PS_Y_PHASE(y_vphase) | PS_UV_RGB_PHASE(uv_rgb_vphase));
+ I915_WRITE_FW(SKL_PS_HPHASE(pipe, scaler_id),
+ PS_Y_PHASE(y_hphase) | PS_UV_RGB_PHASE(uv_rgb_hphase));
I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y);
I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id),
((crtc_w + 1) << 16)|(crtc_h + 1));
}
bool
- skl_plane_get_hw_state(struct intel_plane *plane)
+ skl_plane_get_hw_state(struct intel_plane *plane,
+ enum pipe *pipe)
{
struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
enum intel_display_power_domain power_domain;
enum plane_id plane_id = plane->id;
- enum pipe pipe = plane->pipe;
bool ret;
- power_domain = POWER_DOMAIN_PIPE(pipe);
+ power_domain = POWER_DOMAIN_PIPE(plane->pipe);
if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
- ret = I915_READ(PLANE_CTL(pipe, plane_id)) & PLANE_CTL_ENABLE;
+ ret = I915_READ(PLANE_CTL(plane->pipe, plane_id)) & PLANE_CTL_ENABLE;
+
+ *pipe = plane->pipe;
intel_display_power_put(dev_priv, power_domain);
}
static bool
- vlv_plane_get_hw_state(struct intel_plane *plane)
+ vlv_plane_get_hw_state(struct intel_plane *plane,
+ enum pipe *pipe)
{
struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
enum intel_display_power_domain power_domain;
enum plane_id plane_id = plane->id;
- enum pipe pipe = plane->pipe;
bool ret;
- power_domain = POWER_DOMAIN_PIPE(pipe);
+ power_domain = POWER_DOMAIN_PIPE(plane->pipe);
if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
- ret = I915_READ(SPCNTR(pipe, plane_id)) & SP_ENABLE;
+ ret = I915_READ(SPCNTR(plane->pipe, plane_id)) & SP_ENABLE;
+
+ *pipe = plane->pipe;
intel_display_power_put(dev_priv, power_domain);
}
static bool
- ivb_plane_get_hw_state(struct intel_plane *plane)
+ ivb_plane_get_hw_state(struct intel_plane *plane,
+ enum pipe *pipe)
{
struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
enum intel_display_power_domain power_domain;
- enum pipe pipe = plane->pipe;
bool ret;
- power_domain = POWER_DOMAIN_PIPE(pipe);
+ power_domain = POWER_DOMAIN_PIPE(plane->pipe);
if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
- ret = I915_READ(SPRCTL(pipe)) & SPRITE_ENABLE;
+ ret = I915_READ(SPRCTL(plane->pipe)) & SPRITE_ENABLE;
+
+ *pipe = plane->pipe;
intel_display_power_put(dev_priv, power_domain);
}
static bool
- g4x_plane_get_hw_state(struct intel_plane *plane)
+ g4x_plane_get_hw_state(struct intel_plane *plane,
+ enum pipe *pipe)
{
struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
enum intel_display_power_domain power_domain;
- enum pipe pipe = plane->pipe;
bool ret;
- power_domain = POWER_DOMAIN_PIPE(pipe);
+ power_domain = POWER_DOMAIN_PIPE(plane->pipe);
if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
- ret = I915_READ(DVSCNTR(pipe)) & DVS_ENABLE;
+ ret = I915_READ(DVSCNTR(plane->pipe)) & DVS_ENABLE;
+
+ *pipe = plane->pipe;
intel_display_power_put(dev_priv, power_domain);
struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
struct drm_framebuffer *fb = state->base.fb;
- int crtc_x, crtc_y;
- unsigned int crtc_w, crtc_h;
- uint32_t src_x, src_y, src_w, src_h;
- struct drm_rect *src = &state->base.src;
- struct drm_rect *dst = &state->base.dst;
- struct drm_rect clip = {};
int max_stride = INTEL_GEN(dev_priv) >= 9 ? 32768 : 16384;
- int hscale, vscale;
int max_scale, min_scale;
bool can_scale;
int ret;
uint32_t pixel_format = 0;
- *src = drm_plane_state_src(&state->base);
- *dst = drm_plane_state_dest(&state->base);
-
if (!fb) {
state->base.visible = false;
return 0;
min_scale = plane->can_scale ? 1 : (1 << 16);
}
- /*
- * FIXME the following code does a bunch of fuzzy adjustments to the
- * coordinates and sizes. We probably need some way to decide whether
- * more strict checking should be done instead.
- */
- drm_rect_rotate(src, fb->width << 16, fb->height << 16,
- state->base.rotation);
-
- hscale = drm_rect_calc_hscale_relaxed(src, dst, min_scale, max_scale);
- BUG_ON(hscale < 0);
-
- vscale = drm_rect_calc_vscale_relaxed(src, dst, min_scale, max_scale);
- BUG_ON(vscale < 0);
-
- if (crtc_state->base.enable)
- drm_mode_get_hv_timing(&crtc_state->base.mode,
- &clip.x2, &clip.y2);
-
- state->base.visible = drm_rect_clip_scaled(src, dst, &clip, hscale, vscale);
-
- crtc_x = dst->x1;
- crtc_y = dst->y1;
- crtc_w = drm_rect_width(dst);
- crtc_h = drm_rect_height(dst);
+ ret = drm_atomic_helper_check_plane_state(&state->base,
+ &crtc_state->base,
+ min_scale, max_scale,
+ true, true);
+ if (ret)
+ return ret;
if (state->base.visible) {
- /* check again in case clipping clamped the results */
- hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale);
- if (hscale < 0) {
- DRM_DEBUG_KMS("Horizontal scaling factor out of limits\n");
- drm_rect_debug_print("src: ", src, true);
- drm_rect_debug_print("dst: ", dst, false);
-
- return hscale;
- }
-
- vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale);
- if (vscale < 0) {
- DRM_DEBUG_KMS("Vertical scaling factor out of limits\n");
- drm_rect_debug_print("src: ", src, true);
- drm_rect_debug_print("dst: ", dst, false);
-
- return vscale;
- }
-
- /* Make the source viewport size an exact multiple of the scaling factors. */
- drm_rect_adjust_size(src,
- drm_rect_width(dst) * hscale - drm_rect_width(src),
- drm_rect_height(dst) * vscale - drm_rect_height(src));
-
- drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16,
- state->base.rotation);
-
- /* sanity check to make sure the src viewport wasn't enlarged */
- WARN_ON(src->x1 < (int) state->base.src_x ||
- src->y1 < (int) state->base.src_y ||
- src->x2 > (int) state->base.src_x + state->base.src_w ||
- src->y2 > (int) state->base.src_y + state->base.src_h);
+ struct drm_rect *src = &state->base.src;
+ struct drm_rect *dst = &state->base.dst;
+ unsigned int crtc_w = drm_rect_width(dst);
+ unsigned int crtc_h = drm_rect_height(dst);
+ uint32_t src_x, src_y, src_w, src_h;
/*
* Hardware doesn't handle subpixel coordinates.
src_y = src->y1 >> 16;
src_h = drm_rect_height(src) >> 16;
+ src->x1 = src_x << 16;
+ src->x2 = (src_x + src_w) << 16;
+ src->y1 = src_y << 16;
+ src->y2 = (src_y + src_h) << 16;
+
if (intel_format_is_yuv(fb->format->format) &&
- fb->format->format != DRM_FORMAT_NV12) {
- src_x &= ~1;
- src_w &= ~1;
-
- /*
- * Must keep src and dst the
- * same if we can't scale.
- */
- if (!can_scale)
- crtc_w &= ~1;
-
- if (crtc_w == 0)
- state->base.visible = false;
+ fb->format->format != DRM_FORMAT_NV12 &&
+ (src_x % 2 || src_w % 2)) {
+ DRM_DEBUG_KMS("src x/w (%u, %u) must be a multiple of 2 for YUV planes\n",
+ src_x, src_w);
+ return -EINVAL;
}
- }
- /* Check size restrictions when scaling */
- if (state->base.visible && (src_w != crtc_w || src_h != crtc_h)) {
- unsigned int width_bytes;
- int cpp = fb->format->cpp[0];
+ /* Check size restrictions when scaling */
+ if (src_w != crtc_w || src_h != crtc_h) {
+ unsigned int width_bytes;
+ int cpp = fb->format->cpp[0];
- WARN_ON(!can_scale);
+ WARN_ON(!can_scale);
- /* FIXME interlacing min height is 6 */
+ width_bytes = ((src_x * cpp) & 63) + src_w * cpp;
- if (crtc_w < 3 || crtc_h < 3)
- state->base.visible = false;
-
- if (src_w < 3 || src_h < 3)
- state->base.visible = false;
-
- width_bytes = ((src_x * cpp) & 63) + src_w * cpp;
-
- if (INTEL_GEN(dev_priv) < 9 && (src_w > 2048 || src_h > 2048 ||
- width_bytes > 4096 || fb->pitches[0] > 4096)) {
- DRM_DEBUG_KMS("Source dimensions exceed hardware limits\n");
- return -EINVAL;
+ /* FIXME interlacing min height is 6 */
+ if (INTEL_GEN(dev_priv) < 9 && (
+ src_w < 3 || src_h < 3 ||
+ src_w > 2048 || src_h > 2048 ||
+ crtc_w < 3 || crtc_h < 3 ||
+ width_bytes > 4096 || fb->pitches[0] > 4096)) {
+ DRM_DEBUG_KMS("Source dimensions exceed hardware limits\n");
+ return -EINVAL;
+ }
}
}
- if (state->base.visible) {
- src->x1 = src_x << 16;
- src->x2 = (src_x + src_w) << 16;
- src->y1 = src_y << 16;
- src->y2 = (src_y + src_h) << 16;
- }
-
- dst->x1 = crtc_x;
- dst->x2 = crtc_x + crtc_w;
- dst->y1 = crtc_y;
- dst->y2 = crtc_y + crtc_h;
-
if (INTEL_GEN(dev_priv) >= 9) {
ret = skl_check_plane_surface(crtc_state, state);
if (ret)
}
static bool intel_sprite_plane_format_mod_supported(struct drm_plane *plane,
- uint32_t format,
- uint64_t modifier)
+ uint32_t format,
+ uint64_t modifier)
{
struct drm_i915_private *dev_priv = to_i915(plane->dev);
}
static const struct drm_plane_funcs intel_sprite_plane_funcs = {
- .update_plane = drm_atomic_helper_update_plane,
- .disable_plane = drm_atomic_helper_disable_plane,
- .destroy = intel_plane_destroy,
- .atomic_get_property = intel_plane_atomic_get_property,
- .atomic_set_property = intel_plane_atomic_set_property,
- .atomic_duplicate_state = intel_plane_duplicate_state,
- .atomic_destroy_state = intel_plane_destroy_state,
- .format_mod_supported = intel_sprite_plane_format_mod_supported,
+ .update_plane = drm_atomic_helper_update_plane,
+ .disable_plane = drm_atomic_helper_disable_plane,
+ .destroy = intel_plane_destroy,
+ .atomic_get_property = intel_plane_atomic_get_property,
+ .atomic_set_property = intel_plane_atomic_set_property,
+ .atomic_duplicate_state = intel_plane_duplicate_state,
+ .atomic_destroy_state = intel_plane_destroy_state,
+ .format_mod_supported = intel_sprite_plane_format_mod_supported,
};
bool skl_plane_has_ccs(struct drm_i915_private *dev_priv,