2 * SPDX-License-Identifier: MIT
4 * Copyright © 2014-2018 Intel Corporation
8 #include "intel_context.h"
9 #include "intel_engine_pm.h"
11 #include "intel_ring.h"
12 #include "intel_workarounds.h"
15 * DOC: Hardware workarounds
17 * This file is intended as a central place to implement most [1]_ of the
18 * required workarounds for hardware to work as originally intended. They fall
19 * in five basic categories depending on how/when they are applied:
21 * - Workarounds that touch registers that are saved/restored to/from the HW
22 * context image. The list is emitted (via Load Register Immediate commands)
23 * everytime a new context is created.
24 * - GT workarounds. The list of these WAs is applied whenever these registers
25 * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
26 * - Display workarounds. The list is applied during display clock-gating
28 * - Workarounds that whitelist a privileged register, so that UMDs can manage
29 * them directly. This is just a special case of a MMMIO workaround (as we
30 * write the list of these to/be-whitelisted registers to some special HW
32 * - Workaround batchbuffers, that get executed automatically by the hardware
33 * on every HW context restore.
35 * .. [1] Please notice that there are other WAs that, due to their nature,
36 * cannot be applied from a central place. Those are peppered around the rest
37 * of the code, as needed.
39 * .. [2] Technically, some registers are powercontext saved & restored, so they
40 * survive a suspend/resume. In practice, writing them again is not too
41 * costly and simplifies things. We can revisit this in the future.
46 * Keep things in this file ordered by WA type, as per the above (context, GT,
47 * display, register whitelist, batchbuffer). Then, inside each type, keep the
50 * - Infrastructure functions and macros
51 * - WAs per platform in standard gen/chrono order
52 * - Public functions to init or apply the given workaround type.
56 * KBL revision ID ordering is bizarre; higher revision ID's map to lower
57 * steppings in some cases. So rather than test against the revision ID
58 * directly, let's map that into our own range of increasing ID's that we
59 * can test against in a regular manner.
62 const struct i915_rev_steppings kbl_revids[] = {
63 [0] = { .gt_stepping = KBL_REVID_A0, .disp_stepping = KBL_REVID_A0 },
64 [1] = { .gt_stepping = KBL_REVID_B0, .disp_stepping = KBL_REVID_B0 },
65 [2] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B0 },
66 [3] = { .gt_stepping = KBL_REVID_D0, .disp_stepping = KBL_REVID_B0 },
67 [4] = { .gt_stepping = KBL_REVID_F0, .disp_stepping = KBL_REVID_C0 },
68 [5] = { .gt_stepping = KBL_REVID_C0, .disp_stepping = KBL_REVID_B1 },
69 [6] = { .gt_stepping = KBL_REVID_D1, .disp_stepping = KBL_REVID_B1 },
70 [7] = { .gt_stepping = KBL_REVID_G0, .disp_stepping = KBL_REVID_C0 },
73 const struct i915_rev_steppings tgl_uy_revids[] = {
74 [0] = { .gt_stepping = TGL_REVID_A0, .disp_stepping = TGL_REVID_A0 },
75 [1] = { .gt_stepping = TGL_REVID_B0, .disp_stepping = TGL_REVID_C0 },
76 [2] = { .gt_stepping = TGL_REVID_B1, .disp_stepping = TGL_REVID_C0 },
77 [3] = { .gt_stepping = TGL_REVID_C0, .disp_stepping = TGL_REVID_D0 },
80 /* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
81 const struct i915_rev_steppings tgl_revids[] = {
82 [0] = { .gt_stepping = TGL_REVID_A0, .disp_stepping = TGL_REVID_B0 },
83 [1] = { .gt_stepping = TGL_REVID_B0, .disp_stepping = TGL_REVID_D0 },
86 static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
89 wal->engine_name = engine_name;
92 #define WA_LIST_CHUNK (1 << 4)
94 static void wa_init_finish(struct i915_wa_list *wal)
96 /* Trim unused entries. */
97 if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
98 struct i915_wa *list = kmemdup(wal->list,
99 wal->count * sizeof(*list),
111 DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
112 wal->wa_count, wal->name, wal->engine_name);
115 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
117 unsigned int addr = i915_mmio_reg_offset(wa->reg);
118 unsigned int start = 0, end = wal->count;
119 const unsigned int grow = WA_LIST_CHUNK;
122 GEM_BUG_ON(!is_power_of_2(grow));
124 if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
125 struct i915_wa *list;
127 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
130 DRM_ERROR("No space for workaround init!\n");
135 memcpy(list, wal->list, sizeof(*wa) * wal->count);
140 while (start < end) {
141 unsigned int mid = start + (end - start) / 2;
143 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
145 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
148 wa_ = &wal->list[mid];
150 if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
151 DRM_ERROR("Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
152 i915_mmio_reg_offset(wa_->reg),
155 wa_->set &= ~wa->clr;
161 wa_->read |= wa->read;
167 wa_ = &wal->list[wal->count++];
170 while (wa_-- > wal->list) {
171 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
172 i915_mmio_reg_offset(wa_[1].reg));
173 if (i915_mmio_reg_offset(wa_[1].reg) >
174 i915_mmio_reg_offset(wa_[0].reg))
177 swap(wa_[1], wa_[0]);
181 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
182 u32 clear, u32 set, u32 read_mask)
184 struct i915_wa wa = {
195 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
197 wa_add(wal, reg, clear, set, clear);
201 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
203 wa_write_masked_or(wal, reg, ~0, set);
207 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
209 wa_write_masked_or(wal, reg, set, set);
213 wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
215 wa_write_masked_or(wal, reg, clr, 0);
219 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
221 wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
225 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
227 wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val);
230 #define WA_SET_BIT_MASKED(addr, mask) \
231 wa_masked_en(wal, (addr), (mask))
233 #define WA_CLR_BIT_MASKED(addr, mask) \
234 wa_masked_dis(wal, (addr), (mask))
236 #define WA_SET_FIELD_MASKED(addr, mask, value) \
237 wa_write_masked_or(wal, (addr), 0, _MASKED_FIELD((mask), (value)))
239 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
240 struct i915_wa_list *wal)
242 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
245 static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
246 struct i915_wa_list *wal)
248 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
251 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
252 struct i915_wa_list *wal)
254 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
256 /* WaDisableAsyncFlipPerfMode:bdw,chv */
257 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
259 /* WaDisablePartialInstShootdown:bdw,chv */
260 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
261 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
263 /* Use Force Non-Coherent whenever executing a 3D context. This is a
264 * workaround for for a possible hang in the unlikely event a TLB
265 * invalidation occurs during a PSD flush.
267 /* WaForceEnableNonCoherent:bdw,chv */
268 /* WaHdcDisableFetchWhenMasked:bdw,chv */
269 WA_SET_BIT_MASKED(HDC_CHICKEN0,
270 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
271 HDC_FORCE_NON_COHERENT);
273 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
274 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
275 * polygons in the same 8x4 pixel/sample area to be processed without
276 * stalling waiting for the earlier ones to write to Hierarchical Z
279 * This optimization is off by default for BDW and CHV; turn it on.
281 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
283 /* Wa4x4STCOptimizationDisable:bdw,chv */
284 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
287 * BSpec recommends 8x4 when MSAA is used,
288 * however in practice 16x4 seems fastest.
290 * Note that PS/WM thread counts depend on the WIZ hashing
291 * disable bit, which we don't touch here, but it's good
292 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
294 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
295 GEN6_WIZ_HASHING_MASK,
296 GEN6_WIZ_HASHING_16x4);
299 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
300 struct i915_wa_list *wal)
302 struct drm_i915_private *i915 = engine->i915;
304 gen8_ctx_workarounds_init(engine, wal);
306 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
307 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
309 /* WaDisableDopClockGating:bdw
311 * Also see the related UCGTCL1 write in bdw_init_clock_gating()
312 * to disable EUTC clock gating.
314 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
315 DOP_CLOCK_GATING_DISABLE);
317 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
318 GEN8_SAMPLER_POWER_BYPASS_DIS);
320 WA_SET_BIT_MASKED(HDC_CHICKEN0,
321 /* WaForceContextSaveRestoreNonCoherent:bdw */
322 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
323 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
324 (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
327 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
328 struct i915_wa_list *wal)
330 gen8_ctx_workarounds_init(engine, wal);
332 /* WaDisableThreadStallDopClockGating:chv */
333 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
335 /* Improve HiZ throughput on CHV. */
336 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
339 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
340 struct i915_wa_list *wal)
342 struct drm_i915_private *i915 = engine->i915;
345 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
347 * Must match Display Engine. See
348 * WaCompressedResourceDisplayNewHashMode.
350 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
351 GEN9_PBE_COMPRESSED_HASH_SELECTION);
352 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
353 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
356 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
357 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
358 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
359 FLOW_CONTROL_ENABLE |
360 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
362 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
363 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
364 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
365 GEN9_ENABLE_YV12_BUGFIX |
366 GEN9_ENABLE_GPGPU_PREEMPTION);
368 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
369 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
370 WA_SET_BIT_MASKED(CACHE_MODE_1,
371 GEN8_4x4_STC_OPTIMIZATION_DISABLE |
372 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
374 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
375 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
376 GEN9_CCS_TLB_PREFETCH_ENABLE);
378 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
379 WA_SET_BIT_MASKED(HDC_CHICKEN0,
380 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
381 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
383 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
384 * both tied to WaForceContextSaveRestoreNonCoherent
385 * in some hsds for skl. We keep the tie for all gen9. The
386 * documentation is a bit hazy and so we want to get common behaviour,
387 * even though there is no clear evidence we would need both on kbl/bxt.
388 * This area has been source of system hangs so we play it safe
389 * and mimic the skl regardless of what bspec says.
391 * Use Force Non-Coherent whenever executing a 3D context. This
392 * is a workaround for a possible hang in the unlikely event
393 * a TLB invalidation occurs during a PSD flush.
396 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
397 WA_SET_BIT_MASKED(HDC_CHICKEN0,
398 HDC_FORCE_NON_COHERENT);
400 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
401 if (IS_SKYLAKE(i915) ||
403 IS_COFFEELAKE(i915) ||
405 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
406 GEN8_SAMPLER_POWER_BYPASS_DIS);
408 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
409 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
412 * Supporting preemption with fine-granularity requires changes in the
413 * batch buffer programming. Since we can't break old userspace, we
414 * need to set our default preemption level to safe value. Userspace is
415 * still able to use more fine-grained preemption levels, since in
416 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
417 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
418 * not real HW workarounds, but merely a way to start using preemption
419 * while maintaining old contract with userspace.
422 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
423 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
425 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
426 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
427 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
428 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
430 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
431 if (IS_GEN9_LP(i915))
432 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
435 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
436 struct i915_wa_list *wal)
438 struct intel_gt *gt = engine->gt;
439 u8 vals[3] = { 0, 0, 0 };
442 for (i = 0; i < 3; i++) {
446 * Only consider slices where one, and only one, subslice has 7
449 if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
453 * subslice_7eu[i] != 0 (because of the check above) and
454 * ss_max == 4 (maximum number of subslices possible per slice)
458 ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
462 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
465 /* Tune IZ hashing. See intel_device_info_runtime_init() */
466 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
467 GEN9_IZ_HASHING_MASK(2) |
468 GEN9_IZ_HASHING_MASK(1) |
469 GEN9_IZ_HASHING_MASK(0),
470 GEN9_IZ_HASHING(2, vals[2]) |
471 GEN9_IZ_HASHING(1, vals[1]) |
472 GEN9_IZ_HASHING(0, vals[0]));
475 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
476 struct i915_wa_list *wal)
478 gen9_ctx_workarounds_init(engine, wal);
479 skl_tune_iz_hashing(engine, wal);
482 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
483 struct i915_wa_list *wal)
485 gen9_ctx_workarounds_init(engine, wal);
487 /* WaDisableThreadStallDopClockGating:bxt */
488 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
489 STALL_DOP_GATING_DISABLE);
491 /* WaToEnableHwFixForPushConstHWBug:bxt */
492 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
493 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
496 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
497 struct i915_wa_list *wal)
499 struct drm_i915_private *i915 = engine->i915;
501 gen9_ctx_workarounds_init(engine, wal);
503 /* WaToEnableHwFixForPushConstHWBug:kbl */
504 if (IS_KBL_GT_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
505 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
506 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
508 /* WaDisableSbeCacheDispatchPortSharing:kbl */
509 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
510 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
513 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
514 struct i915_wa_list *wal)
516 gen9_ctx_workarounds_init(engine, wal);
518 /* WaToEnableHwFixForPushConstHWBug:glk */
519 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
520 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
523 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
524 struct i915_wa_list *wal)
526 gen9_ctx_workarounds_init(engine, wal);
528 /* WaToEnableHwFixForPushConstHWBug:cfl */
529 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
530 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
532 /* WaDisableSbeCacheDispatchPortSharing:cfl */
533 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
534 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
537 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
538 struct i915_wa_list *wal)
540 /* WaForceContextSaveRestoreNonCoherent:cnl */
541 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
542 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
544 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
545 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
546 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
548 /* WaPushConstantDereferenceHoldDisable:cnl */
549 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
551 /* FtrEnableFastAnisoL1BankingFix:cnl */
552 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
554 /* WaDisable3DMidCmdPreemption:cnl */
555 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
557 /* WaDisableGPGPUMidCmdPreemption:cnl */
558 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
559 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
560 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
562 /* WaDisableEarlyEOT:cnl */
563 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
566 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
567 struct i915_wa_list *wal)
569 struct drm_i915_private *i915 = engine->i915;
571 /* WaDisableBankHangMode:icl */
574 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
577 /* Wa_1604370585:icl (pre-prod)
578 * Formerly known as WaPushConstantDereferenceHoldDisable
580 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
581 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
582 PUSH_CONSTANT_DEREF_DISABLE);
584 /* WaForceEnableNonCoherent:icl
585 * This is not the same workaround as in early Gen9 platforms, where
586 * lacking this could cause system hangs, but coherency performance
587 * overhead is high and only a few compute workloads really need it
588 * (the register is whitelisted in hardware now, so UMDs can opt in
589 * for coherency if they have a good reason).
591 WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
593 /* Wa_2006611047:icl (pre-prod)
594 * Formerly known as WaDisableImprovedTdlClkGating
596 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
597 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
598 GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
600 /* Wa_2006665173:icl (pre-prod) */
601 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
602 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
603 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
605 /* WaEnableFloatBlendOptimization:icl */
606 wa_write_masked_or(wal,
608 0, /* write-only, so skip validation */
609 _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
611 /* WaDisableGPGPUMidThreadPreemption:icl */
612 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
613 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
614 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
616 /* allow headerless messages for preemptible GPGPU context */
617 WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
618 GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
620 /* Wa_1604278689:icl,ehl */
621 wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
622 wa_write_masked_or(wal, IVB_FBC_RT_BASE_UPPER,
623 0, /* write-only register; skip validation */
626 /* Wa_1406306137:icl,ehl */
627 wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
630 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
631 struct i915_wa_list *wal)
644 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
645 GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
647 /* WaDisableGPGPUMidThreadPreemption:gen12 */
648 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
649 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
650 GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
653 static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
654 struct i915_wa_list *wal)
656 gen12_ctx_workarounds_init(engine, wal);
659 * Wa_1604555607:tgl,rkl
661 * Note that the implementation of this workaround is further modified
662 * according to the FF_MODE2 guidance given by Wa_1608008084:gen12.
663 * FF_MODE2 register will return the wrong value when read. The default
664 * value for this register is zero for all fields and there are no bit
665 * masks. So instead of doing a RMW we should just write the GS Timer
666 * and TDS timer values for Wa_1604555607 and Wa_16011163337.
670 FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK,
671 FF_MODE2_GS_TIMER_224 | FF_MODE2_TDS_TIMER_128,
676 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
677 struct i915_wa_list *wal,
680 struct drm_i915_private *i915 = engine->i915;
682 if (engine->class != RENDER_CLASS)
685 wa_init_start(wal, name, engine->name);
687 if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915))
688 tgl_ctx_workarounds_init(engine, wal);
689 else if (IS_GEN(i915, 12))
690 gen12_ctx_workarounds_init(engine, wal);
691 else if (IS_GEN(i915, 11))
692 icl_ctx_workarounds_init(engine, wal);
693 else if (IS_CANNONLAKE(i915))
694 cnl_ctx_workarounds_init(engine, wal);
695 else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
696 cfl_ctx_workarounds_init(engine, wal);
697 else if (IS_GEMINILAKE(i915))
698 glk_ctx_workarounds_init(engine, wal);
699 else if (IS_KABYLAKE(i915))
700 kbl_ctx_workarounds_init(engine, wal);
701 else if (IS_BROXTON(i915))
702 bxt_ctx_workarounds_init(engine, wal);
703 else if (IS_SKYLAKE(i915))
704 skl_ctx_workarounds_init(engine, wal);
705 else if (IS_CHERRYVIEW(i915))
706 chv_ctx_workarounds_init(engine, wal);
707 else if (IS_BROADWELL(i915))
708 bdw_ctx_workarounds_init(engine, wal);
709 else if (IS_GEN(i915, 7))
710 gen7_ctx_workarounds_init(engine, wal);
711 else if (IS_GEN(i915, 6))
712 gen6_ctx_workarounds_init(engine, wal);
713 else if (INTEL_GEN(i915) < 8)
716 MISSING_CASE(INTEL_GEN(i915));
721 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
723 __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
726 int intel_engine_emit_ctx_wa(struct i915_request *rq)
728 struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
737 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
741 cs = intel_ring_begin(rq, (wal->count * 2 + 2));
745 *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
746 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
747 *cs++ = i915_mmio_reg_offset(wa->reg);
752 intel_ring_advance(rq, cs);
754 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
762 gen4_gt_workarounds_init(struct drm_i915_private *i915,
763 struct i915_wa_list *wal)
765 /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
766 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
770 g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
772 gen4_gt_workarounds_init(i915, wal);
774 /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
775 wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
779 ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
781 g4x_gt_workarounds_init(i915, wal);
783 wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
787 snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
789 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
792 _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
794 /* WaDisable_RenderCache_OperationalFlush:snb */
795 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
798 * BSpec recommends 8x4 when MSAA is used,
799 * however in practice 16x4 seems fastest.
801 * Note that PS/WM thread counts depend on the WIZ hashing
802 * disable bit, which we don't touch here, but it's good
803 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
807 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
808 GEN6_WIZ_HASHING_16x4);
810 wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB);
814 /* WaStripsFansDisableFastClipPerformanceFix:snb */
815 _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
818 * "This bit must be set if 3DSTATE_CLIP clip mode is set
819 * to normal and 3DSTATE_SF number of SF output attributes
822 _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
826 ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
828 /* WaDisableEarlyCull:ivb */
829 wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
831 /* WaDisablePSDDualDispatchEnable:ivb */
832 if (IS_IVB_GT1(i915))
834 GEN7_HALF_SLICE_CHICKEN1,
835 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
837 /* WaDisable_RenderCache_OperationalFlush:ivb */
838 wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
840 /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
842 GEN7_COMMON_SLICE_CHICKEN1,
843 GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
845 /* WaApplyL3ControlAndL3ChickenMode:ivb */
846 wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
847 wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
849 /* WaForceL3Serialization:ivb */
850 wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
853 * WaVSThreadDispatchOverride:ivb,vlv
855 * This actually overrides the dispatch
856 * mode for all thread types.
858 wa_write_masked_or(wal, GEN7_FF_THREAD_MODE,
860 GEN7_FF_TS_SCHED_HW |
861 GEN7_FF_VS_SCHED_HW |
862 GEN7_FF_DS_SCHED_HW);
864 if (0) { /* causes HiZ corruption on ivb:gt1 */
865 /* enable HiZ Raw Stall Optimization */
866 wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
869 /* WaDisable4x2SubspanOptimization:ivb */
870 wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
873 * BSpec recommends 8x4 when MSAA is used,
874 * however in practice 16x4 seems fastest.
876 * Note that PS/WM thread counts depend on the WIZ hashing
877 * disable bit, which we don't touch here, but it's good
878 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
880 wa_add(wal, GEN7_GT_MODE, 0,
881 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
882 GEN6_WIZ_HASHING_16x4);
886 vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
888 /* WaDisableEarlyCull:vlv */
889 wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
891 /* WaPsdDispatchEnable:vlv */
892 /* WaDisablePSDDualDispatchEnable:vlv */
894 GEN7_HALF_SLICE_CHICKEN1,
895 GEN7_MAX_PS_THREAD_DEP |
896 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
898 /* WaDisable_RenderCache_OperationalFlush:vlv */
899 wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
901 /* WaForceL3Serialization:vlv */
902 wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
905 * WaVSThreadDispatchOverride:ivb,vlv
907 * This actually overrides the dispatch
908 * mode for all thread types.
910 wa_write_masked_or(wal,
913 GEN7_FF_TS_SCHED_HW |
914 GEN7_FF_VS_SCHED_HW |
915 GEN7_FF_DS_SCHED_HW);
918 * BSpec says this must be set, even though
919 * WaDisable4x2SubspanOptimization isn't listed for VLV.
921 wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
924 * BSpec recommends 8x4 when MSAA is used,
925 * however in practice 16x4 seems fastest.
927 * Note that PS/WM thread counts depend on the WIZ hashing
928 * disable bit, which we don't touch here, but it's good
929 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
931 wa_add(wal, GEN7_GT_MODE, 0,
932 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
933 GEN6_WIZ_HASHING_16x4);
936 * WaIncreaseL3CreditsForVLVB0:vlv
937 * This is the hardware default actually.
939 wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
943 hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
945 /* L3 caching of data atomics doesn't work -- disable it. */
946 wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
950 _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
951 0 /* XXX does this reg exist? */);
953 /* WaVSRefCountFullforceMissDisable:hsw */
954 wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
958 /* WaDisable_RenderCache_OperationalFlush:hsw */
960 /* enable HiZ Raw Stall Optimization */
961 HIZ_RAW_STALL_OPT_DISABLE);
963 /* WaDisable4x2SubspanOptimization:hsw */
964 wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
967 * BSpec recommends 8x4 when MSAA is used,
968 * however in practice 16x4 seems fastest.
970 * Note that PS/WM thread counts depend on the WIZ hashing
971 * disable bit, which we don't touch here, but it's good
972 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
974 wa_add(wal, GEN7_GT_MODE, 0,
975 _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
976 GEN6_WIZ_HASHING_16x4);
978 /* WaSampleCChickenBitEnable:hsw */
979 wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
983 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
985 /* WaDisableKillLogic:bxt,skl,kbl */
986 if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
992 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
994 * Must match Display Engine. See
995 * WaCompressedResourceDisplayNewHashMode.
999 MMCD_PCLA | MMCD_HOTSPOT_EN);
1002 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
1005 BDW_DISABLE_HDC_INVALIDATION);
1009 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1011 gen9_gt_workarounds_init(i915, wal);
1013 /* WaDisableGafsUnitClkGating:skl */
1016 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1018 /* WaInPlaceDecompressionHang:skl */
1019 if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
1021 GEN9_GAMT_ECO_REG_RW_IA,
1022 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1026 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1028 gen9_gt_workarounds_init(i915, wal);
1030 /* WaInPlaceDecompressionHang:bxt */
1032 GEN9_GAMT_ECO_REG_RW_IA,
1033 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1037 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1039 gen9_gt_workarounds_init(i915, wal);
1041 /* WaDisableDynamicCreditSharing:kbl */
1042 if (IS_KBL_GT_REVID(i915, 0, KBL_REVID_B0))
1045 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
1047 /* WaDisableGafsUnitClkGating:kbl */
1050 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1052 /* WaInPlaceDecompressionHang:kbl */
1054 GEN9_GAMT_ECO_REG_RW_IA,
1055 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1059 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1061 gen9_gt_workarounds_init(i915, wal);
1065 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1067 gen9_gt_workarounds_init(i915, wal);
1069 /* WaDisableGafsUnitClkGating:cfl */
1072 GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
1074 /* WaInPlaceDecompressionHang:cfl */
1076 GEN9_GAMT_ECO_REG_RW_IA,
1077 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1081 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
1083 const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
1084 unsigned int slice, subslice;
1085 u32 l3_en, mcr, mcr_mask;
1087 GEM_BUG_ON(INTEL_GEN(i915) < 10);
1090 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
1091 * L3Banks could be fused off in single slice scenario. If that is
1092 * the case, we might need to program MCR select to a valid L3Bank
1093 * by default, to make sure we correctly read certain registers
1094 * later on (in the range 0xB100 - 0xB3FF).
1096 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
1097 * Before any MMIO read into slice/subslice specific registers, MCR
1098 * packet control register needs to be programmed to point to any
1099 * enabled s/ss pair. Otherwise, incorrect values will be returned.
1100 * This means each subsequent MMIO read will be forwarded to an
1101 * specific s/ss combination, but this is OK since these registers
1102 * are consistent across s/ss in almost all cases. In the rare
1103 * occasions, such as INSTDONE, where this value is dependent
1104 * on s/ss combo, the read should be done with read_subslice_reg.
1106 * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
1107 * to which subslice, or to which L3 bank, the respective mmio reads
1108 * will go, we have to find a common index which works for both
1111 * Case where we cannot find a common index fortunately should not
1112 * happen in production hardware, so we only emit a warning instead of
1113 * implementing something more complex that requires checking the range
1114 * of every MMIO read.
1117 if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
1119 intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
1122 drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse);
1123 l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
1128 slice = fls(sseu->slice_mask) - 1;
1129 subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
1131 drm_warn(&i915->drm,
1132 "No common index found between subslice mask %x and L3 bank mask %x!\n",
1133 intel_sseu_get_subslices(sseu, slice), l3_en);
1134 subslice = fls(l3_en);
1135 drm_WARN_ON(&i915->drm, !subslice);
1139 if (INTEL_GEN(i915) >= 11) {
1140 mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
1141 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
1143 mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
1144 mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
1147 drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
1149 wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
1153 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1155 wa_init_mcr(i915, wal);
1157 /* WaInPlaceDecompressionHang:cnl */
1159 GEN9_GAMT_ECO_REG_RW_IA,
1160 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1164 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1166 wa_init_mcr(i915, wal);
1168 /* WaInPlaceDecompressionHang:icl */
1170 GEN9_GAMT_ECO_REG_RW_IA,
1171 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
1173 /* WaModifyGamTlbPartitioning:icl */
1174 wa_write_masked_or(wal,
1175 GEN11_GACB_PERF_CTRL,
1176 GEN11_HASH_CTRL_MASK,
1177 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
1179 /* Wa_1405766107:icl
1180 * Formerly known as WaCL2SFHalfMaxAlloc
1184 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
1185 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
1188 * Formerly known as WaDisCtxReload
1191 GEN8_GAMW_ECO_DEV_RW_IA,
1192 GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
1194 /* Wa_1405779004:icl (pre-prod) */
1195 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
1197 SLICE_UNIT_LEVEL_CLKGATE,
1198 MSCUNIT_CLKGATE_DIS);
1200 /* Wa_1406838659:icl (pre-prod) */
1201 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1203 INF_UNIT_LEVEL_CLKGATE,
1206 /* Wa_1406463099:icl
1207 * Formerly known as WaGamTlbPendError
1211 GAMT_CHKN_DISABLE_L3_COH_PIPE);
1213 /* Wa_1607087056:icl,ehl,jsl */
1214 if (IS_ICELAKE(i915) ||
1215 IS_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) {
1217 SLICE_UNIT_LEVEL_CLKGATE,
1218 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1223 gen12_gt_workarounds_init(struct drm_i915_private *i915,
1224 struct i915_wa_list *wal)
1226 wa_init_mcr(i915, wal);
1230 tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
1232 gen12_gt_workarounds_init(i915, wal);
1234 /* Wa_1409420604:tgl */
1235 if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
1237 SUBSLICE_UNIT_LEVEL_CLKGATE2,
1238 CPSSUNIT_CLKGATE_DIS);
1240 /* Wa_1607087056:tgl also know as BUG:1409180338 */
1241 if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
1243 SLICE_UNIT_LEVEL_CLKGATE,
1244 L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
1248 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
1250 if (IS_TIGERLAKE(i915))
1251 tgl_gt_workarounds_init(i915, wal);
1252 else if (IS_GEN(i915, 12))
1253 gen12_gt_workarounds_init(i915, wal);
1254 else if (IS_GEN(i915, 11))
1255 icl_gt_workarounds_init(i915, wal);
1256 else if (IS_CANNONLAKE(i915))
1257 cnl_gt_workarounds_init(i915, wal);
1258 else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
1259 cfl_gt_workarounds_init(i915, wal);
1260 else if (IS_GEMINILAKE(i915))
1261 glk_gt_workarounds_init(i915, wal);
1262 else if (IS_KABYLAKE(i915))
1263 kbl_gt_workarounds_init(i915, wal);
1264 else if (IS_BROXTON(i915))
1265 bxt_gt_workarounds_init(i915, wal);
1266 else if (IS_SKYLAKE(i915))
1267 skl_gt_workarounds_init(i915, wal);
1268 else if (IS_HASWELL(i915))
1269 hsw_gt_workarounds_init(i915, wal);
1270 else if (IS_VALLEYVIEW(i915))
1271 vlv_gt_workarounds_init(i915, wal);
1272 else if (IS_IVYBRIDGE(i915))
1273 ivb_gt_workarounds_init(i915, wal);
1274 else if (IS_GEN(i915, 6))
1275 snb_gt_workarounds_init(i915, wal);
1276 else if (IS_GEN(i915, 5))
1277 ilk_gt_workarounds_init(i915, wal);
1278 else if (IS_G4X(i915))
1279 g4x_gt_workarounds_init(i915, wal);
1280 else if (IS_GEN(i915, 4))
1281 gen4_gt_workarounds_init(i915, wal);
1282 else if (INTEL_GEN(i915) <= 8)
1285 MISSING_CASE(INTEL_GEN(i915));
1288 void intel_gt_init_workarounds(struct drm_i915_private *i915)
1290 struct i915_wa_list *wal = &i915->gt_wa_list;
1292 wa_init_start(wal, "GT", "global");
1293 gt_init_workarounds(i915, wal);
1294 wa_init_finish(wal);
1297 static enum forcewake_domains
1298 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1300 enum forcewake_domains fw = 0;
1304 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1305 fw |= intel_uncore_forcewake_for_reg(uncore,
1314 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
1316 if ((cur ^ wa->set) & wa->read) {
1317 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x)\n",
1318 name, from, i915_mmio_reg_offset(wa->reg),
1319 cur, cur & wa->read, wa->set);
1328 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
1330 enum forcewake_domains fw;
1331 unsigned long flags;
1338 fw = wal_get_fw_for_rmw(uncore, wal);
1340 spin_lock_irqsave(&uncore->lock, flags);
1341 intel_uncore_forcewake_get__locked(uncore, fw);
1343 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1345 intel_uncore_rmw_fw(uncore, wa->reg, wa->clr, wa->set);
1347 intel_uncore_write_fw(uncore, wa->reg, wa->set);
1348 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1350 intel_uncore_read_fw(uncore, wa->reg),
1351 wal->name, "application");
1354 intel_uncore_forcewake_put__locked(uncore, fw);
1355 spin_unlock_irqrestore(&uncore->lock, flags);
1358 void intel_gt_apply_workarounds(struct intel_gt *gt)
1360 wa_list_apply(gt->uncore, >->i915->gt_wa_list);
1363 static bool wa_list_verify(struct intel_uncore *uncore,
1364 const struct i915_wa_list *wal,
1371 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1373 intel_uncore_read(uncore, wa->reg),
1379 bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
1381 return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from);
1384 static inline bool is_nonpriv_flags_valid(u32 flags)
1386 /* Check only valid flag bits are set */
1387 if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
1390 /* NB: Only 3 out of 4 enum values are valid for access field */
1391 if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
1392 RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
1399 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1401 struct i915_wa wa = {
1405 if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1408 if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
1411 wa.reg.reg |= flags;
1416 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1418 whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
1421 static void gen9_whitelist_build(struct i915_wa_list *w)
1423 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1424 whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1426 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1427 whitelist_reg(w, GEN8_CS_CHICKEN1);
1429 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1430 whitelist_reg(w, GEN8_HDC_CHICKEN1);
1432 /* WaSendPushConstantsFromMMIO:skl,bxt */
1433 whitelist_reg(w, COMMON_SLICE_CHICKEN2);
1436 static void skl_whitelist_build(struct intel_engine_cs *engine)
1438 struct i915_wa_list *w = &engine->whitelist;
1440 if (engine->class != RENDER_CLASS)
1443 gen9_whitelist_build(w);
1445 /* WaDisableLSQCROPERFforOCL:skl */
1446 whitelist_reg(w, GEN8_L3SQCREG4);
1449 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1451 if (engine->class != RENDER_CLASS)
1454 gen9_whitelist_build(&engine->whitelist);
1457 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1459 struct i915_wa_list *w = &engine->whitelist;
1461 if (engine->class != RENDER_CLASS)
1464 gen9_whitelist_build(w);
1466 /* WaDisableLSQCROPERFforOCL:kbl */
1467 whitelist_reg(w, GEN8_L3SQCREG4);
1470 static void glk_whitelist_build(struct intel_engine_cs *engine)
1472 struct i915_wa_list *w = &engine->whitelist;
1474 if (engine->class != RENDER_CLASS)
1477 gen9_whitelist_build(w);
1479 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1480 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1483 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1485 struct i915_wa_list *w = &engine->whitelist;
1487 if (engine->class != RENDER_CLASS)
1490 gen9_whitelist_build(w);
1493 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
1495 * This covers 4 register which are next to one another :
1496 * - PS_INVOCATION_COUNT
1497 * - PS_INVOCATION_COUNT_UDW
1499 * - PS_DEPTH_COUNT_UDW
1501 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1502 RING_FORCE_TO_NONPRIV_ACCESS_RD |
1503 RING_FORCE_TO_NONPRIV_RANGE_4);
1506 static void cml_whitelist_build(struct intel_engine_cs *engine)
1508 struct i915_wa_list *w = &engine->whitelist;
1510 if (engine->class != RENDER_CLASS)
1511 whitelist_reg_ext(w,
1512 RING_CTX_TIMESTAMP(engine->mmio_base),
1513 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1515 cfl_whitelist_build(engine);
1518 static void cnl_whitelist_build(struct intel_engine_cs *engine)
1520 struct i915_wa_list *w = &engine->whitelist;
1522 if (engine->class != RENDER_CLASS)
1525 /* WaEnablePreemptionGranularityControlByUMD:cnl */
1526 whitelist_reg(w, GEN8_CS_CHICKEN1);
1529 static void icl_whitelist_build(struct intel_engine_cs *engine)
1531 struct i915_wa_list *w = &engine->whitelist;
1533 switch (engine->class) {
1535 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1536 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1538 /* WaAllowUMDToModifySamplerMode:icl */
1539 whitelist_reg(w, GEN10_SAMPLER_MODE);
1541 /* WaEnableStateCacheRedirectToCS:icl */
1542 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1545 * WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
1547 * This covers 4 register which are next to one another :
1548 * - PS_INVOCATION_COUNT
1549 * - PS_INVOCATION_COUNT_UDW
1551 * - PS_DEPTH_COUNT_UDW
1553 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1554 RING_FORCE_TO_NONPRIV_ACCESS_RD |
1555 RING_FORCE_TO_NONPRIV_RANGE_4);
1558 case VIDEO_DECODE_CLASS:
1559 /* hucStatusRegOffset */
1560 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1561 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1562 /* hucUKernelHdrInfoRegOffset */
1563 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1564 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1565 /* hucStatus2RegOffset */
1566 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1567 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1568 whitelist_reg_ext(w,
1569 RING_CTX_TIMESTAMP(engine->mmio_base),
1570 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1574 whitelist_reg_ext(w,
1575 RING_CTX_TIMESTAMP(engine->mmio_base),
1576 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1581 static void tgl_whitelist_build(struct intel_engine_cs *engine)
1583 struct i915_wa_list *w = &engine->whitelist;
1585 switch (engine->class) {
1588 * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
1591 * This covers 4 registers which are next to one another :
1592 * - PS_INVOCATION_COUNT
1593 * - PS_INVOCATION_COUNT_UDW
1595 * - PS_DEPTH_COUNT_UDW
1597 whitelist_reg_ext(w, PS_INVOCATION_COUNT,
1598 RING_FORCE_TO_NONPRIV_ACCESS_RD |
1599 RING_FORCE_TO_NONPRIV_RANGE_4);
1601 /* Wa_1808121037:tgl */
1602 whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
1604 /* Wa_1806527549:tgl */
1605 whitelist_reg(w, HIZ_CHICKEN);
1608 whitelist_reg_ext(w,
1609 RING_CTX_TIMESTAMP(engine->mmio_base),
1610 RING_FORCE_TO_NONPRIV_ACCESS_RD);
1615 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1617 struct drm_i915_private *i915 = engine->i915;
1618 struct i915_wa_list *w = &engine->whitelist;
1620 wa_init_start(w, "whitelist", engine->name);
1622 if (IS_GEN(i915, 12))
1623 tgl_whitelist_build(engine);
1624 else if (IS_GEN(i915, 11))
1625 icl_whitelist_build(engine);
1626 else if (IS_CANNONLAKE(i915))
1627 cnl_whitelist_build(engine);
1628 else if (IS_COMETLAKE(i915))
1629 cml_whitelist_build(engine);
1630 else if (IS_COFFEELAKE(i915))
1631 cfl_whitelist_build(engine);
1632 else if (IS_GEMINILAKE(i915))
1633 glk_whitelist_build(engine);
1634 else if (IS_KABYLAKE(i915))
1635 kbl_whitelist_build(engine);
1636 else if (IS_BROXTON(i915))
1637 bxt_whitelist_build(engine);
1638 else if (IS_SKYLAKE(i915))
1639 skl_whitelist_build(engine);
1640 else if (INTEL_GEN(i915) <= 8)
1643 MISSING_CASE(INTEL_GEN(i915));
1648 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1650 const struct i915_wa_list *wal = &engine->whitelist;
1651 struct intel_uncore *uncore = engine->uncore;
1652 const u32 base = engine->mmio_base;
1659 for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1660 intel_uncore_write(uncore,
1661 RING_FORCE_TO_NONPRIV(base, i),
1662 i915_mmio_reg_offset(wa->reg));
1664 /* And clear the rest just in case of garbage */
1665 for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1666 intel_uncore_write(uncore,
1667 RING_FORCE_TO_NONPRIV(base, i),
1668 i915_mmio_reg_offset(RING_NOPID(base)));
1672 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1674 struct drm_i915_private *i915 = engine->i915;
1676 if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
1682 GEN9_CTX_PREEMPT_REG,
1683 GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
1687 * (see also Wa_1606682166:icl)
1691 GEN7_DISABLE_SAMPLER_PREFETCH);
1693 /* Wa_1408615072:tgl */
1694 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1695 VSUNIT_CLKGATE_DIS_TGL);
1698 if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
1699 /* Wa_1606931601:tgl,rkl */
1700 wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
1702 /* Wa_1409804808:tgl,rkl */
1703 wa_masked_en(wal, GEN7_ROW_CHICKEN2,
1704 GEN12_PUSH_CONST_DEREF_HOLD_DIS);
1708 * Wa_14010229206:tgl,rkl
1710 wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
1713 * Wa_1407928979:tgl A*
1714 * Wa_18011464164:tgl B0+
1715 * Wa_22010931296:tgl B0+
1716 * Wa_14010919138:rkl,tgl
1718 wa_write_or(wal, GEN7_FF_THREAD_MODE,
1719 GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1724 * Wa_1607297627:tgl,rkl there are multiple entries for this
1725 * WA in the BSpec; some indicate this is an A0-only WA,
1726 * others indicate it applies to all steppings.
1729 GEN6_RC_SLEEP_PSMI_CONTROL,
1730 GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
1731 GEN8_RC_SEMA_IDLE_MSG_DISABLE);
1735 * Wa_22010271021:tgl,rkl
1738 GEN9_CS_DEBUG_MODE1,
1739 FF_DOP_CLOCK_GATE_DISABLE);
1742 if (IS_GEN(i915, 12)) {
1743 /* Wa_1406941453:gen12 */
1749 if (IS_GEN(i915, 11)) {
1750 /* This is not an Wa. Enable for better image quality */
1753 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1755 /* WaPipelineFlushCoherentLines:icl */
1758 GEN8_LQSC_FLUSH_COHERENT_LINES);
1762 * Formerly known as WaGAPZPriorityScheme
1766 GEN11_ARBITRATION_PRIO_ORDER_MASK);
1770 * Formerly known as WaL3BankAddressHashing
1772 wa_write_masked_or(wal,
1774 GEN11_HASH_CTRL_EXCL_MASK,
1775 GEN11_HASH_CTRL_EXCL_BIT0);
1776 wa_write_masked_or(wal,
1778 GEN11_BANK_HASH_ADDR_EXCL_MASK,
1779 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1783 * Formerly known as WaDisableCleanEvicts
1787 GEN11_LQSC_CLEAN_EVICT_DISABLE);
1789 /* WaForwardProgressSoftReset:icl */
1791 GEN10_SCRATCH_LNCF2,
1792 PMFLUSHDONE_LNICRSDROP |
1793 PMFLUSH_GAPL3UNBLOCK |
1794 PMFLUSHDONE_LNEBLK);
1796 /* Wa_1406609255:icl (pre-prod) */
1797 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1800 GEN7_DISABLE_DEMAND_PREFETCH);
1802 /* Wa_1606682166:icl */
1805 GEN7_DISABLE_SAMPLER_PREFETCH);
1807 /* Wa_1409178092:icl */
1808 wa_write_masked_or(wal,
1810 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
1813 /* WaEnable32PlaneMode:icl */
1814 wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
1815 GEN11_ENABLE_32_PLANE_MODE);
1818 * Wa_1408615072:icl,ehl (vsunit)
1819 * Wa_1407596294:icl,ehl (hsunit)
1821 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
1822 VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
1824 /* Wa_1407352427:icl,ehl */
1825 wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
1826 PSDUNIT_CLKGATE_DIS);
1828 /* Wa_1406680159:icl,ehl */
1830 SUBSLICE_UNIT_LEVEL_CLKGATE,
1831 GWUNIT_CLKGATE_DIS);
1834 * Wa_1408767742:icl[a2..forever],ehl[all]
1835 * Wa_1605460711:icl[a0..c0]
1838 GEN7_FF_THREAD_MODE,
1839 GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
1841 /* Wa_22010271021:ehl */
1842 if (IS_ELKHARTLAKE(i915))
1844 GEN9_CS_DEBUG_MODE1,
1845 FF_DOP_CLOCK_GATE_DISABLE);
1848 if (IS_GEN_RANGE(i915, 9, 12)) {
1849 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
1851 GEN7_FF_SLICE_CS_CHICKEN1,
1852 GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1855 if (IS_SKYLAKE(i915) ||
1856 IS_KABYLAKE(i915) ||
1857 IS_COFFEELAKE(i915) ||
1858 IS_COMETLAKE(i915)) {
1859 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1862 GEN9_GAPS_TSV_CREDIT_DISABLE);
1865 if (IS_BROXTON(i915)) {
1866 /* WaDisablePooledEuLoadBalancingFix:bxt */
1868 FF_SLICE_CS_CHICKEN2,
1869 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1872 if (IS_GEN(i915, 9)) {
1873 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1875 GEN9_CSFE_CHICKEN1_RCS,
1876 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1878 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1881 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1883 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1884 if (IS_GEN9_LP(i915))
1885 wa_write_masked_or(wal,
1887 L3_PRIO_CREDITS_MASK,
1888 L3_GENERAL_PRIO_CREDITS(62) |
1889 L3_HIGH_PRIO_CREDITS(2));
1891 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1894 GEN8_LQSC_FLUSH_COHERENT_LINES);
1897 if (IS_GEN(i915, 7))
1898 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
1901 GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
1903 if (IS_GEN_RANGE(i915, 6, 7))
1905 * We need to disable the AsyncFlip performance optimisations in
1906 * order to use MI_WAIT_FOR_EVENT within the CS. It should
1907 * already be programmed to '1' on all products.
1909 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
1913 ASYNC_FLIP_PERF_DISABLE);
1915 if (IS_GEN(i915, 6)) {
1917 * Required for the hardware to program scanline values for
1919 * WaEnableFlushTlbInvalidationMode:snb
1923 GFX_TLB_INVALIDATE_EXPLICIT);
1926 * From the Sandybridge PRM, volume 1 part 3, page 24:
1927 * "If this bit is set, STCunit will have LRA as replacement
1928 * policy. [...] This bit must be reset. LRA replacement
1929 * policy is not supported."
1933 CM0_STC_EVICT_DISABLE_LRA_SNB);
1936 if (IS_GEN_RANGE(i915, 4, 6))
1937 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
1938 wa_add(wal, MI_MODE,
1939 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
1940 /* XXX bit doesn't stick on Broadwater */
1941 IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH);
1943 if (IS_GEN(i915, 4))
1945 * Disable CONSTANT_BUFFER before it is loaded from the context
1946 * image. For as it is loaded, it is executed and the stored
1947 * address may no longer be valid, leading to a GPU hang.
1949 * This imposes the requirement that userspace reload their
1950 * CONSTANT_BUFFER on every batch, fortunately a requirement
1951 * they are already accustomed to from before contexts were
1954 wa_add(wal, ECOSKPD,
1955 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
1956 0 /* XXX bit doesn't stick on Broadwater */);
1960 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1962 struct drm_i915_private *i915 = engine->i915;
1964 /* WaKBLVECSSemaphoreWaitPoll:kbl */
1965 if (IS_KBL_GT_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1967 RING_SEMA_WAIT_POLL(engine->mmio_base),
1973 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1975 if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 4))
1978 if (engine->class == RENDER_CLASS)
1979 rcs_engine_wa_init(engine, wal);
1981 xcs_engine_wa_init(engine, wal);
1984 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1986 struct i915_wa_list *wal = &engine->wa_list;
1988 if (INTEL_GEN(engine->i915) < 4)
1991 wa_init_start(wal, "engine", engine->name);
1992 engine_init_workarounds(engine, wal);
1993 wa_init_finish(wal);
1996 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1998 wa_list_apply(engine->uncore, &engine->wa_list);
2001 static struct i915_vma *
2002 create_scratch(struct i915_address_space *vm, int count)
2004 struct drm_i915_gem_object *obj;
2005 struct i915_vma *vma;
2009 size = round_up(count * sizeof(u32), PAGE_SIZE);
2010 obj = i915_gem_object_create_internal(vm->i915, size);
2012 return ERR_CAST(obj);
2014 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
2016 vma = i915_vma_instance(obj, vm, NULL);
2022 err = i915_vma_pin(vma, 0, 0,
2023 i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
2030 i915_gem_object_put(obj);
2031 return ERR_PTR(err);
2034 static const struct {
2037 } mcr_ranges_gen8[] = {
2038 { .start = 0x5500, .end = 0x55ff },
2039 { .start = 0x7000, .end = 0x7fff },
2040 { .start = 0x9400, .end = 0x97ff },
2041 { .start = 0xb000, .end = 0xb3ff },
2042 { .start = 0xe000, .end = 0xe7ff },
2046 static bool mcr_range(struct drm_i915_private *i915, u32 offset)
2050 if (INTEL_GEN(i915) < 8)
2054 * Registers in these ranges are affected by the MCR selector
2055 * which only controls CPU initiated MMIO. Routing does not
2056 * work for CS access so we cannot verify them on this path.
2058 for (i = 0; mcr_ranges_gen8[i].start; i++)
2059 if (offset >= mcr_ranges_gen8[i].start &&
2060 offset <= mcr_ranges_gen8[i].end)
2067 wa_list_srm(struct i915_request *rq,
2068 const struct i915_wa_list *wal,
2069 struct i915_vma *vma)
2071 struct drm_i915_private *i915 = rq->engine->i915;
2072 unsigned int i, count = 0;
2073 const struct i915_wa *wa;
2076 srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
2077 if (INTEL_GEN(i915) >= 8)
2080 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2081 if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
2085 cs = intel_ring_begin(rq, 4 * count);
2089 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2090 u32 offset = i915_mmio_reg_offset(wa->reg);
2092 if (mcr_range(i915, offset))
2097 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
2100 intel_ring_advance(rq, cs);
2105 static int engine_wa_list_verify(struct intel_context *ce,
2106 const struct i915_wa_list * const wal,
2109 const struct i915_wa *wa;
2110 struct i915_request *rq;
2111 struct i915_vma *vma;
2112 struct i915_gem_ww_ctx ww;
2120 vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count);
2122 return PTR_ERR(vma);
2124 intel_engine_pm_get(ce->engine);
2125 i915_gem_ww_ctx_init(&ww, false);
2127 err = i915_gem_object_lock(vma->obj, &ww);
2129 err = intel_context_pin_ww(ce, &ww);
2133 rq = i915_request_create(ce);
2139 err = i915_request_await_object(rq, vma->obj, true);
2141 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
2143 err = wa_list_srm(rq, wal, vma);
2145 i915_request_get(rq);
2147 i915_request_set_error_once(rq, err);
2148 i915_request_add(rq);
2153 if (i915_request_wait(rq, 0, HZ / 5) < 0) {
2158 results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
2159 if (IS_ERR(results)) {
2160 err = PTR_ERR(results);
2165 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
2166 if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
2169 if (!wa_verify(wa, results[i], wal->name, from))
2173 i915_gem_object_unpin_map(vma->obj);
2176 i915_request_put(rq);
2178 intel_context_unpin(ce);
2180 if (err == -EDEADLK) {
2181 err = i915_gem_ww_ctx_backoff(&ww);
2185 i915_gem_ww_ctx_fini(&ww);
2186 intel_engine_pm_put(ce->engine);
2187 i915_vma_unpin(vma);
2192 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
2195 return engine_wa_list_verify(engine->kernel_context,
2200 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2201 #include "selftest_workarounds.c"