2 * SPDX-License-Identifier: MIT
4 * Copyright © 2014-2018 Intel Corporation
8 #include "intel_workarounds.h"
11 * DOC: Hardware workarounds
13 * This file is intended as a central place to implement most [1]_ of the
14 * required workarounds for hardware to work as originally intended. They fall
15 * in five basic categories depending on how/when they are applied:
17 * - Workarounds that touch registers that are saved/restored to/from the HW
18 * context image. The list is emitted (via Load Register Immediate commands)
19 * everytime a new context is created.
20 * - GT workarounds. The list of these WAs is applied whenever these registers
21 * revert to default values (on GPU reset, suspend/resume [2]_, etc..).
22 * - Display workarounds. The list is applied during display clock-gating
24 * - Workarounds that whitelist a privileged register, so that UMDs can manage
25 * them directly. This is just a special case of a MMMIO workaround (as we
26 * write the list of these to/be-whitelisted registers to some special HW
28 * - Workaround batchbuffers, that get executed automatically by the hardware
29 * on every HW context restore.
31 * .. [1] Please notice that there are other WAs that, due to their nature,
32 * cannot be applied from a central place. Those are peppered around the rest
33 * of the code, as needed.
35 * .. [2] Technically, some registers are powercontext saved & restored, so they
36 * survive a suspend/resume. In practice, writing them again is not too
37 * costly and simplifies things. We can revisit this in the future.
42 * Keep things in this file ordered by WA type, as per the above (context, GT,
43 * display, register whitelist, batchbuffer). Then, inside each type, keep the
46 * - Infrastructure functions and macros
47 * - WAs per platform in standard gen/chrono order
48 * - Public functions to init or apply the given workaround type.
51 static void wa_add(struct drm_i915_private *i915,
52 i915_reg_t reg, const u32 mask, const u32 val)
54 struct i915_workarounds *wa = &i915->workarounds;
55 unsigned int start = 0, end = wa->count;
56 unsigned int addr = i915_mmio_reg_offset(reg);
57 struct i915_wa_reg *r;
60 unsigned int mid = start + (end - start) / 2;
62 if (wa->reg[mid].addr < addr) {
64 } else if (wa->reg[mid].addr > addr) {
69 if ((mask & ~r->mask) == 0) {
70 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
71 addr, r->mask, r->value);
82 if (WARN_ON_ONCE(wa->count >= I915_MAX_WA_REGS)) {
83 DRM_ERROR("Dropping w/a for reg %04x (mask: %08x, value: %08x)\n",
88 r = &wa->reg[wa->count++];
93 while (r-- > wa->reg) {
94 GEM_BUG_ON(r[0].addr == r[1].addr);
95 if (r[1].addr > r[0].addr)
102 #define WA_REG(addr, mask, val) wa_add(dev_priv, (addr), (mask), (val))
104 #define WA_SET_BIT_MASKED(addr, mask) \
105 WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
107 #define WA_CLR_BIT_MASKED(addr, mask) \
108 WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
110 #define WA_SET_FIELD_MASKED(addr, mask, value) \
111 WA_REG(addr, (mask), _MASKED_FIELD(mask, value))
113 static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv)
115 WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
117 /* WaDisableAsyncFlipPerfMode:bdw,chv */
118 WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
120 /* WaDisablePartialInstShootdown:bdw,chv */
121 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
122 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
124 /* Use Force Non-Coherent whenever executing a 3D context. This is a
125 * workaround for for a possible hang in the unlikely event a TLB
126 * invalidation occurs during a PSD flush.
128 /* WaForceEnableNonCoherent:bdw,chv */
129 /* WaHdcDisableFetchWhenMasked:bdw,chv */
130 WA_SET_BIT_MASKED(HDC_CHICKEN0,
131 HDC_DONOT_FETCH_MEM_WHEN_MASKED |
132 HDC_FORCE_NON_COHERENT);
134 /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
135 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
136 * polygons in the same 8x4 pixel/sample area to be processed without
137 * stalling waiting for the earlier ones to write to Hierarchical Z
140 * This optimization is off by default for BDW and CHV; turn it on.
142 WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
144 /* Wa4x4STCOptimizationDisable:bdw,chv */
145 WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
148 * BSpec recommends 8x4 when MSAA is used,
149 * however in practice 16x4 seems fastest.
151 * Note that PS/WM thread counts depend on the WIZ hashing
152 * disable bit, which we don't touch here, but it's good
153 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
155 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
156 GEN6_WIZ_HASHING_MASK,
157 GEN6_WIZ_HASHING_16x4);
162 static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv)
166 ret = gen8_ctx_workarounds_init(dev_priv);
170 /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
171 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
173 /* WaDisableDopClockGating:bdw
175 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
176 * to disable EUTC clock gating.
178 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
179 DOP_CLOCK_GATING_DISABLE);
181 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
182 GEN8_SAMPLER_POWER_BYPASS_DIS);
184 WA_SET_BIT_MASKED(HDC_CHICKEN0,
185 /* WaForceContextSaveRestoreNonCoherent:bdw */
186 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
187 /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
188 (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
193 static int chv_ctx_workarounds_init(struct drm_i915_private *dev_priv)
197 ret = gen8_ctx_workarounds_init(dev_priv);
201 /* WaDisableThreadStallDopClockGating:chv */
202 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
204 /* Improve HiZ throughput on CHV. */
205 WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
210 static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv)
212 if (HAS_LLC(dev_priv)) {
213 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
215 * Must match Display Engine. See
216 * WaCompressedResourceDisplayNewHashMode.
218 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
219 GEN9_PBE_COMPRESSED_HASH_SELECTION);
220 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
221 GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
224 /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
225 /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
226 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
227 FLOW_CONTROL_ENABLE |
228 PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
230 /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
231 if (!IS_COFFEELAKE(dev_priv))
232 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
233 GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
235 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
236 /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
237 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
238 GEN9_ENABLE_YV12_BUGFIX |
239 GEN9_ENABLE_GPGPU_PREEMPTION);
241 /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
242 /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
243 WA_SET_BIT_MASKED(CACHE_MODE_1,
244 GEN8_4x4_STC_OPTIMIZATION_DISABLE |
245 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
247 /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
248 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
249 GEN9_CCS_TLB_PREFETCH_ENABLE);
251 /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
252 WA_SET_BIT_MASKED(HDC_CHICKEN0,
253 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
254 HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
256 /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
257 * both tied to WaForceContextSaveRestoreNonCoherent
258 * in some hsds for skl. We keep the tie for all gen9. The
259 * documentation is a bit hazy and so we want to get common behaviour,
260 * even though there is no clear evidence we would need both on kbl/bxt.
261 * This area has been source of system hangs so we play it safe
262 * and mimic the skl regardless of what bspec says.
264 * Use Force Non-Coherent whenever executing a 3D context. This
265 * is a workaround for a possible hang in the unlikely event
266 * a TLB invalidation occurs during a PSD flush.
269 /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
270 WA_SET_BIT_MASKED(HDC_CHICKEN0,
271 HDC_FORCE_NON_COHERENT);
273 /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
274 if (IS_SKYLAKE(dev_priv) ||
275 IS_KABYLAKE(dev_priv) ||
276 IS_COFFEELAKE(dev_priv))
277 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
278 GEN8_SAMPLER_POWER_BYPASS_DIS);
280 /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
281 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
284 * Supporting preemption with fine-granularity requires changes in the
285 * batch buffer programming. Since we can't break old userspace, we
286 * need to set our default preemption level to safe value. Userspace is
287 * still able to use more fine-grained preemption levels, since in
288 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
289 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
290 * not real HW workarounds, but merely a way to start using preemption
291 * while maintaining old contract with userspace.
294 /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
295 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
297 /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
298 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
299 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
300 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
302 /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
303 if (IS_GEN9_LP(dev_priv))
304 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
309 static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv)
311 u8 vals[3] = { 0, 0, 0 };
314 for (i = 0; i < 3; i++) {
318 * Only consider slices where one, and only one, subslice has 7
321 if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
325 * subslice_7eu[i] != 0 (because of the check above) and
326 * ss_max == 4 (maximum number of subslices possible per slice)
330 ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
334 if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
337 /* Tune IZ hashing. See intel_device_info_runtime_init() */
338 WA_SET_FIELD_MASKED(GEN7_GT_MODE,
339 GEN9_IZ_HASHING_MASK(2) |
340 GEN9_IZ_HASHING_MASK(1) |
341 GEN9_IZ_HASHING_MASK(0),
342 GEN9_IZ_HASHING(2, vals[2]) |
343 GEN9_IZ_HASHING(1, vals[1]) |
344 GEN9_IZ_HASHING(0, vals[0]));
349 static int skl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
353 ret = gen9_ctx_workarounds_init(dev_priv);
357 return skl_tune_iz_hashing(dev_priv);
360 static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv)
364 ret = gen9_ctx_workarounds_init(dev_priv);
368 /* WaDisableThreadStallDopClockGating:bxt */
369 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
370 STALL_DOP_GATING_DISABLE);
372 /* WaToEnableHwFixForPushConstHWBug:bxt */
373 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
374 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
379 static int kbl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
383 ret = gen9_ctx_workarounds_init(dev_priv);
387 /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
388 if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
389 WA_SET_BIT_MASKED(HDC_CHICKEN0,
390 HDC_FENCE_DEST_SLM_DISABLE);
392 /* WaToEnableHwFixForPushConstHWBug:kbl */
393 if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
394 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
395 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
397 /* WaDisableSbeCacheDispatchPortSharing:kbl */
398 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
399 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
404 static int glk_ctx_workarounds_init(struct drm_i915_private *dev_priv)
408 ret = gen9_ctx_workarounds_init(dev_priv);
412 /* WaToEnableHwFixForPushConstHWBug:glk */
413 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
414 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
419 static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
423 ret = gen9_ctx_workarounds_init(dev_priv);
427 /* WaToEnableHwFixForPushConstHWBug:cfl */
428 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
429 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
431 /* WaDisableSbeCacheDispatchPortSharing:cfl */
432 WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
433 GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
438 static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
440 /* WaForceContextSaveRestoreNonCoherent:cnl */
441 WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
442 HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
444 /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
445 if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
446 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
448 /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
449 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
450 GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
452 /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
453 if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0))
454 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
455 GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
457 /* WaPushConstantDereferenceHoldDisable:cnl */
458 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
460 /* FtrEnableFastAnisoL1BankingFix:cnl */
461 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
463 /* WaDisable3DMidCmdPreemption:cnl */
464 WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
466 /* WaDisableGPGPUMidCmdPreemption:cnl */
467 WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
468 GEN9_PREEMPT_GPGPU_LEVEL_MASK,
469 GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
471 /* WaDisableEarlyEOT:cnl */
472 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
477 static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
479 /* Wa_1604370585:icl (pre-prod)
480 * Formerly known as WaPushConstantDereferenceHoldDisable
482 if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
483 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
484 PUSH_CONSTANT_DEREF_DISABLE);
486 /* WaForceEnableNonCoherent:icl
487 * This is not the same workaround as in early Gen9 platforms, where
488 * lacking this could cause system hangs, but coherency performance
489 * overhead is high and only a few compute workloads really need it
490 * (the register is whitelisted in hardware now, so UMDs can opt in
491 * for coherency if they have a good reason).
493 WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
495 /* Wa_2006611047:icl (pre-prod)
496 * Formerly known as WaDisableImprovedTdlClkGating
498 if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
499 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
500 GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
502 /* WaEnableStateCacheRedirectToCS:icl */
503 WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1,
504 GEN11_STATE_CACHE_REDIRECT_TO_CS);
506 /* Wa_2006665173:icl (pre-prod) */
507 if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
508 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
509 GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
511 /* WaEnableFloatBlendOptimization:icl */
512 WA_SET_BIT_MASKED(GEN10_CACHE_MODE_SS, FLOAT_BLEND_OPTIMIZATION_ENABLE);
517 int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
521 dev_priv->workarounds.count = 0;
523 if (INTEL_GEN(dev_priv) < 8)
525 else if (IS_BROADWELL(dev_priv))
526 err = bdw_ctx_workarounds_init(dev_priv);
527 else if (IS_CHERRYVIEW(dev_priv))
528 err = chv_ctx_workarounds_init(dev_priv);
529 else if (IS_SKYLAKE(dev_priv))
530 err = skl_ctx_workarounds_init(dev_priv);
531 else if (IS_BROXTON(dev_priv))
532 err = bxt_ctx_workarounds_init(dev_priv);
533 else if (IS_KABYLAKE(dev_priv))
534 err = kbl_ctx_workarounds_init(dev_priv);
535 else if (IS_GEMINILAKE(dev_priv))
536 err = glk_ctx_workarounds_init(dev_priv);
537 else if (IS_COFFEELAKE(dev_priv))
538 err = cfl_ctx_workarounds_init(dev_priv);
539 else if (IS_CANNONLAKE(dev_priv))
540 err = cnl_ctx_workarounds_init(dev_priv);
541 else if (IS_ICELAKE(dev_priv))
542 err = icl_ctx_workarounds_init(dev_priv);
544 MISSING_CASE(INTEL_GEN(dev_priv));
548 DRM_DEBUG_DRIVER("Number of context specific w/a: %d\n",
549 dev_priv->workarounds.count);
553 int intel_ctx_workarounds_emit(struct i915_request *rq)
555 struct i915_workarounds *w = &rq->i915->workarounds;
562 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
566 cs = intel_ring_begin(rq, (w->count * 2 + 2));
570 *cs++ = MI_LOAD_REGISTER_IMM(w->count);
571 for (i = 0; i < w->count; i++) {
572 *cs++ = w->reg[i].addr;
573 *cs++ = w->reg[i].value;
577 intel_ring_advance(rq, cs);
579 ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
586 static void bdw_gt_workarounds_apply(struct drm_i915_private *dev_priv)
590 static void chv_gt_workarounds_apply(struct drm_i915_private *dev_priv)
594 static void gen9_gt_workarounds_apply(struct drm_i915_private *dev_priv)
596 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
597 I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
598 _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
600 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
601 I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
602 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
604 /* WaDisableKillLogic:bxt,skl,kbl */
605 if (!IS_COFFEELAKE(dev_priv))
606 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
609 if (HAS_LLC(dev_priv)) {
610 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
612 * Must match Display Engine. See
613 * WaCompressedResourceDisplayNewHashMode.
615 I915_WRITE(MMCD_MISC_CTRL,
616 I915_READ(MMCD_MISC_CTRL) |
621 /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
622 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
623 BDW_DISABLE_HDC_INVALIDATION);
625 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
626 if (IS_GEN9_LP(dev_priv)) {
627 u32 val = I915_READ(GEN8_L3SQCREG1);
629 val &= ~L3_PRIO_CREDITS_MASK;
630 val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
631 I915_WRITE(GEN8_L3SQCREG1, val);
634 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
635 I915_WRITE(GEN8_L3SQCREG4,
636 I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES);
638 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
639 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
640 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
643 static void skl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
645 gen9_gt_workarounds_apply(dev_priv);
647 /* WaEnableGapsTsvCreditFix:skl */
648 I915_WRITE(GEN8_GARBCNTL,
649 I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
651 /* WaDisableGafsUnitClkGating:skl */
652 I915_WRITE(GEN7_UCGCTL4,
653 I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
655 /* WaInPlaceDecompressionHang:skl */
656 if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
657 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
658 I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
659 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
662 static void bxt_gt_workarounds_apply(struct drm_i915_private *dev_priv)
664 gen9_gt_workarounds_apply(dev_priv);
666 /* WaDisablePooledEuLoadBalancingFix:bxt */
667 I915_WRITE(FF_SLICE_CS_CHICKEN2,
668 _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
670 /* WaInPlaceDecompressionHang:bxt */
671 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
672 I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
673 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
676 static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
678 gen9_gt_workarounds_apply(dev_priv);
680 /* WaEnableGapsTsvCreditFix:kbl */
681 I915_WRITE(GEN8_GARBCNTL,
682 I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
684 /* WaDisableDynamicCreditSharing:kbl */
685 if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
686 I915_WRITE(GAMT_CHKN_BIT_REG,
687 I915_READ(GAMT_CHKN_BIT_REG) |
688 GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
690 /* WaDisableGafsUnitClkGating:kbl */
691 I915_WRITE(GEN7_UCGCTL4,
692 I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
694 /* WaInPlaceDecompressionHang:kbl */
695 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
696 I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
697 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
699 /* WaKBLVECSSemaphoreWaitPoll:kbl */
700 if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_E0)) {
701 struct intel_engine_cs *engine;
704 for_each_engine(engine, dev_priv, tmp) {
705 if (engine->id == RCS)
708 I915_WRITE(RING_SEMA_WAIT_POLL(engine->mmio_base), 1);
713 static void glk_gt_workarounds_apply(struct drm_i915_private *dev_priv)
715 gen9_gt_workarounds_apply(dev_priv);
718 static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
720 gen9_gt_workarounds_apply(dev_priv);
722 /* WaEnableGapsTsvCreditFix:cfl */
723 I915_WRITE(GEN8_GARBCNTL,
724 I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
726 /* WaDisableGafsUnitClkGating:cfl */
727 I915_WRITE(GEN7_UCGCTL4,
728 I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
730 /* WaInPlaceDecompressionHang:cfl */
731 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
732 I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
733 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
736 static void wa_init_mcr(struct drm_i915_private *dev_priv)
738 const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu);
740 u32 mcr_slice_subslice_mask;
743 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
744 * L3Banks could be fused off in single slice scenario. If that is
745 * the case, we might need to program MCR select to a valid L3Bank
746 * by default, to make sure we correctly read certain registers
747 * later on (in the range 0xB100 - 0xB3FF).
748 * This might be incompatible with
749 * WaProgramMgsrForCorrectSliceSpecificMmioReads.
750 * Fortunately, this should not happen in production hardware, so
751 * we only assert that this is the case (instead of implementing
752 * something more complex that requires checking the range of every
755 if (INTEL_GEN(dev_priv) >= 10 &&
756 is_power_of_2(sseu->slice_mask)) {
758 * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
759 * enabled subslice, no need to redirect MCR packet
761 u32 slice = fls(sseu->slice_mask);
762 u32 fuse3 = I915_READ(GEN10_MIRROR_FUSE3);
763 u8 ss_mask = sseu->subslice_mask[slice];
765 u8 enabled_mask = (ss_mask | ss_mask >>
766 GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
767 u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
770 * Production silicon should have matched L3Bank and
773 WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
776 mcr = I915_READ(GEN8_MCR_SELECTOR);
778 if (INTEL_GEN(dev_priv) >= 11)
779 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
780 GEN11_MCR_SUBSLICE_MASK;
782 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
783 GEN8_MCR_SUBSLICE_MASK;
785 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
786 * Before any MMIO read into slice/subslice specific registers, MCR
787 * packet control register needs to be programmed to point to any
788 * enabled s/ss pair. Otherwise, incorrect values will be returned.
789 * This means each subsequent MMIO read will be forwarded to an
790 * specific s/ss combination, but this is OK since these registers
791 * are consistent across s/ss in almost all cases. In the rare
792 * occasions, such as INSTDONE, where this value is dependent
793 * on s/ss combo, the read should be done with read_subslice_reg.
795 mcr &= ~mcr_slice_subslice_mask;
796 mcr |= intel_calculate_mcr_s_ss_select(dev_priv);
797 I915_WRITE(GEN8_MCR_SELECTOR, mcr);
800 static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
802 wa_init_mcr(dev_priv);
804 /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
805 if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
806 I915_WRITE(GAMT_CHKN_BIT_REG,
807 I915_READ(GAMT_CHKN_BIT_REG) |
808 GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
810 /* WaInPlaceDecompressionHang:cnl */
811 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
812 I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
813 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
815 /* WaEnablePreemptionGranularityControlByUMD:cnl */
816 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
817 _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
820 static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
822 wa_init_mcr(dev_priv);
824 /* This is not an Wa. Enable for better image quality */
825 I915_WRITE(_3D_CHICKEN3,
826 _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
828 /* WaInPlaceDecompressionHang:icl */
829 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
830 GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
832 /* WaPipelineFlushCoherentLines:icl */
833 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
834 GEN8_LQSC_FLUSH_COHERENT_LINES);
837 * Formerly known as WaGAPZPriorityScheme
839 I915_WRITE(GEN8_GARBCNTL, I915_READ(GEN8_GARBCNTL) |
840 GEN11_ARBITRATION_PRIO_ORDER_MASK);
843 * Formerly known as WaL3BankAddressHashing
845 I915_WRITE(GEN8_GARBCNTL,
846 (I915_READ(GEN8_GARBCNTL) & ~GEN11_HASH_CTRL_EXCL_MASK) |
847 GEN11_HASH_CTRL_EXCL_BIT0);
848 I915_WRITE(GEN11_GLBLINVL,
849 (I915_READ(GEN11_GLBLINVL) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK) |
850 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
852 /* WaModifyGamTlbPartitioning:icl */
853 I915_WRITE(GEN11_GACB_PERF_CTRL,
854 (I915_READ(GEN11_GACB_PERF_CTRL) & ~GEN11_HASH_CTRL_MASK) |
855 GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
858 * Formerly known as WaDisableCleanEvicts
860 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
861 GEN11_LQSC_CLEAN_EVICT_DISABLE);
864 * Formerly known as WaCL2SFHalfMaxAlloc
866 I915_WRITE(GEN11_LSN_UNSLCVC, I915_READ(GEN11_LSN_UNSLCVC) |
867 GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
868 GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
871 * Formerly known as WaDisCtxReload
873 I915_WRITE(GAMW_ECO_DEV_RW_IA_REG, I915_READ(GAMW_ECO_DEV_RW_IA_REG) |
874 GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
876 /* Wa_1405779004:icl (pre-prod) */
877 if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
878 I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE,
879 I915_READ(SLICE_UNIT_LEVEL_CLKGATE) |
880 MSCUNIT_CLKGATE_DIS);
882 /* Wa_1406680159:icl */
883 I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE,
884 I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE) |
887 /* Wa_1604302699:icl */
888 I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER,
889 I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER) |
890 GEN11_I2M_WRITE_DISABLE);
892 /* Wa_1406838659:icl (pre-prod) */
893 if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
894 I915_WRITE(INF_UNIT_LEVEL_CLKGATE,
895 I915_READ(INF_UNIT_LEVEL_CLKGATE) |
898 /* WaForwardProgressSoftReset:icl */
899 I915_WRITE(GEN10_SCRATCH_LNCF2,
900 I915_READ(GEN10_SCRATCH_LNCF2) |
901 PMFLUSHDONE_LNICRSDROP |
902 PMFLUSH_GAPL3UNBLOCK |
906 * Formerly known as WaGamTlbPendError
908 I915_WRITE(GAMT_CHKN_BIT_REG,
909 I915_READ(GAMT_CHKN_BIT_REG) |
910 GAMT_CHKN_DISABLE_L3_COH_PIPE);
913 void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
915 if (INTEL_GEN(dev_priv) < 8)
917 else if (IS_BROADWELL(dev_priv))
918 bdw_gt_workarounds_apply(dev_priv);
919 else if (IS_CHERRYVIEW(dev_priv))
920 chv_gt_workarounds_apply(dev_priv);
921 else if (IS_SKYLAKE(dev_priv))
922 skl_gt_workarounds_apply(dev_priv);
923 else if (IS_BROXTON(dev_priv))
924 bxt_gt_workarounds_apply(dev_priv);
925 else if (IS_KABYLAKE(dev_priv))
926 kbl_gt_workarounds_apply(dev_priv);
927 else if (IS_GEMINILAKE(dev_priv))
928 glk_gt_workarounds_apply(dev_priv);
929 else if (IS_COFFEELAKE(dev_priv))
930 cfl_gt_workarounds_apply(dev_priv);
931 else if (IS_CANNONLAKE(dev_priv))
932 cnl_gt_workarounds_apply(dev_priv);
933 else if (IS_ICELAKE(dev_priv))
934 icl_gt_workarounds_apply(dev_priv);
936 MISSING_CASE(INTEL_GEN(dev_priv));
940 i915_reg_t reg[RING_MAX_NONPRIV_SLOTS];
945 static void whitelist_reg(struct whitelist *w, i915_reg_t reg)
947 if (GEM_WARN_ON(w->count >= RING_MAX_NONPRIV_SLOTS))
950 w->reg[w->count++] = reg;
953 static void bdw_whitelist_build(struct whitelist *w)
957 static void chv_whitelist_build(struct whitelist *w)
961 static void gen9_whitelist_build(struct whitelist *w)
963 /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
964 whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
966 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
967 whitelist_reg(w, GEN8_CS_CHICKEN1);
969 /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
970 whitelist_reg(w, GEN8_HDC_CHICKEN1);
973 static void skl_whitelist_build(struct whitelist *w)
975 gen9_whitelist_build(w);
977 /* WaDisableLSQCROPERFforOCL:skl */
978 whitelist_reg(w, GEN8_L3SQCREG4);
981 static void bxt_whitelist_build(struct whitelist *w)
983 gen9_whitelist_build(w);
986 static void kbl_whitelist_build(struct whitelist *w)
988 gen9_whitelist_build(w);
990 /* WaDisableLSQCROPERFforOCL:kbl */
991 whitelist_reg(w, GEN8_L3SQCREG4);
994 static void glk_whitelist_build(struct whitelist *w)
996 gen9_whitelist_build(w);
998 /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
999 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1002 static void cfl_whitelist_build(struct whitelist *w)
1004 gen9_whitelist_build(w);
1007 static void cnl_whitelist_build(struct whitelist *w)
1009 /* WaEnablePreemptionGranularityControlByUMD:cnl */
1010 whitelist_reg(w, GEN8_CS_CHICKEN1);
1013 static void icl_whitelist_build(struct whitelist *w)
1017 static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
1018 struct whitelist *w)
1020 struct drm_i915_private *i915 = engine->i915;
1022 GEM_BUG_ON(engine->id != RCS);
1025 w->nopid = i915_mmio_reg_offset(RING_NOPID(engine->mmio_base));
1027 if (INTEL_GEN(i915) < 8)
1029 else if (IS_BROADWELL(i915))
1030 bdw_whitelist_build(w);
1031 else if (IS_CHERRYVIEW(i915))
1032 chv_whitelist_build(w);
1033 else if (IS_SKYLAKE(i915))
1034 skl_whitelist_build(w);
1035 else if (IS_BROXTON(i915))
1036 bxt_whitelist_build(w);
1037 else if (IS_KABYLAKE(i915))
1038 kbl_whitelist_build(w);
1039 else if (IS_GEMINILAKE(i915))
1040 glk_whitelist_build(w);
1041 else if (IS_COFFEELAKE(i915))
1042 cfl_whitelist_build(w);
1043 else if (IS_CANNONLAKE(i915))
1044 cnl_whitelist_build(w);
1045 else if (IS_ICELAKE(i915))
1046 icl_whitelist_build(w);
1048 MISSING_CASE(INTEL_GEN(i915));
1053 static void whitelist_apply(struct intel_engine_cs *engine,
1054 const struct whitelist *w)
1056 struct drm_i915_private *dev_priv = engine->i915;
1057 const u32 base = engine->mmio_base;
1063 intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
1065 for (i = 0; i < w->count; i++)
1066 I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i),
1067 i915_mmio_reg_offset(w->reg[i]));
1069 /* And clear the rest just in case of garbage */
1070 for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1071 I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), w->nopid);
1073 intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
1076 void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine)
1080 whitelist_apply(engine, whitelist_build(engine, &w));
1083 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1084 #include "selftests/intel_workarounds.c"