drm/i915/icl: restore WaEnableFloatBlendOptimization
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / intel_workarounds.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2018 Intel Corporation
5  */
6
7 #include "i915_drv.h"
8 #include "intel_workarounds.h"
9
10 /**
11  * DOC: Hardware workarounds
12  *
13  * This file is intended as a central place to implement most [1]_ of the
14  * required workarounds for hardware to work as originally intended. They fall
15  * in five basic categories depending on how/when they are applied:
16  *
17  * - Workarounds that touch registers that are saved/restored to/from the HW
18  *   context image. The list is emitted (via Load Register Immediate commands)
19  *   everytime a new context is created.
20  * - GT workarounds. The list of these WAs is applied whenever these registers
21  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
22  * - Display workarounds. The list is applied during display clock-gating
23  *   initialization.
24  * - Workarounds that whitelist a privileged register, so that UMDs can manage
25  *   them directly. This is just a special case of a MMMIO workaround (as we
26  *   write the list of these to/be-whitelisted registers to some special HW
27  *   registers).
28  * - Workaround batchbuffers, that get executed automatically by the hardware
29  *   on every HW context restore.
30  *
31  * .. [1] Please notice that there are other WAs that, due to their nature,
32  *    cannot be applied from a central place. Those are peppered around the rest
33  *    of the code, as needed.
34  *
35  * .. [2] Technically, some registers are powercontext saved & restored, so they
36  *    survive a suspend/resume. In practice, writing them again is not too
37  *    costly and simplifies things. We can revisit this in the future.
38  *
39  * Layout
40  * ''''''
41  *
42  * Keep things in this file ordered by WA type, as per the above (context, GT,
43  * display, register whitelist, batchbuffer). Then, inside each type, keep the
44  * following order:
45  *
46  * - Infrastructure functions and macros
47  * - WAs per platform in standard gen/chrono order
48  * - Public functions to init or apply the given workaround type.
49  */
50
51 static void wa_init_start(struct i915_wa_list *wal, const char *name)
52 {
53         wal->name = name;
54 }
55
56 #define WA_LIST_CHUNK (1 << 4)
57
58 static void wa_init_finish(struct i915_wa_list *wal)
59 {
60         /* Trim unused entries. */
61         if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
62                 struct i915_wa *list = kmemdup(wal->list,
63                                                wal->count * sizeof(*list),
64                                                GFP_KERNEL);
65
66                 if (list) {
67                         kfree(wal->list);
68                         wal->list = list;
69                 }
70         }
71
72         if (!wal->count)
73                 return;
74
75         DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
76                          wal->wa_count, wal->name);
77 }
78
79 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
80 {
81         unsigned int addr = i915_mmio_reg_offset(wa->reg);
82         unsigned int start = 0, end = wal->count;
83         const unsigned int grow = WA_LIST_CHUNK;
84         struct i915_wa *wa_;
85
86         GEM_BUG_ON(!is_power_of_2(grow));
87
88         if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
89                 struct i915_wa *list;
90
91                 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
92                                      GFP_KERNEL);
93                 if (!list) {
94                         DRM_ERROR("No space for workaround init!\n");
95                         return;
96                 }
97
98                 if (wal->list)
99                         memcpy(list, wal->list, sizeof(*wa) * wal->count);
100
101                 wal->list = list;
102         }
103
104         while (start < end) {
105                 unsigned int mid = start + (end - start) / 2;
106
107                 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
108                         start = mid + 1;
109                 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
110                         end = mid;
111                 } else {
112                         wa_ = &wal->list[mid];
113
114                         if ((wa->mask & ~wa_->mask) == 0) {
115                                 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
116                                           i915_mmio_reg_offset(wa_->reg),
117                                           wa_->mask, wa_->val);
118
119                                 wa_->val &= ~wa->mask;
120                         }
121
122                         wal->wa_count++;
123                         wa_->val |= wa->val;
124                         wa_->mask |= wa->mask;
125                         return;
126                 }
127         }
128
129         wal->wa_count++;
130         wa_ = &wal->list[wal->count++];
131         *wa_ = *wa;
132
133         while (wa_-- > wal->list) {
134                 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
135                            i915_mmio_reg_offset(wa_[1].reg));
136                 if (i915_mmio_reg_offset(wa_[1].reg) >
137                     i915_mmio_reg_offset(wa_[0].reg))
138                         break;
139
140                 swap(wa_[1], wa_[0]);
141         }
142 }
143
144 static void
145 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
146                    u32 val)
147 {
148         struct i915_wa wa = {
149                 .reg = reg,
150                 .mask = mask,
151                 .val = val
152         };
153
154         _wa_add(wal, &wa);
155 }
156
157 static void
158 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
159 {
160         wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
161 }
162
163 static void
164 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
165 {
166         wa_write_masked_or(wal, reg, ~0, val);
167 }
168
169 static void
170 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
171 {
172         wa_write_masked_or(wal, reg, val, val);
173 }
174
175 #define WA_SET_BIT_MASKED(addr, mask) \
176         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
177
178 #define WA_CLR_BIT_MASKED(addr, mask) \
179         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
180
181 #define WA_SET_FIELD_MASKED(addr, mask, value) \
182         wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
183
184 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine)
185 {
186         struct i915_wa_list *wal = &engine->ctx_wa_list;
187
188         WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
189
190         /* WaDisableAsyncFlipPerfMode:bdw,chv */
191         WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
192
193         /* WaDisablePartialInstShootdown:bdw,chv */
194         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
195                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
196
197         /* Use Force Non-Coherent whenever executing a 3D context. This is a
198          * workaround for for a possible hang in the unlikely event a TLB
199          * invalidation occurs during a PSD flush.
200          */
201         /* WaForceEnableNonCoherent:bdw,chv */
202         /* WaHdcDisableFetchWhenMasked:bdw,chv */
203         WA_SET_BIT_MASKED(HDC_CHICKEN0,
204                           HDC_DONOT_FETCH_MEM_WHEN_MASKED |
205                           HDC_FORCE_NON_COHERENT);
206
207         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
208          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
209          *  polygons in the same 8x4 pixel/sample area to be processed without
210          *  stalling waiting for the earlier ones to write to Hierarchical Z
211          *  buffer."
212          *
213          * This optimization is off by default for BDW and CHV; turn it on.
214          */
215         WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
216
217         /* Wa4x4STCOptimizationDisable:bdw,chv */
218         WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
219
220         /*
221          * BSpec recommends 8x4 when MSAA is used,
222          * however in practice 16x4 seems fastest.
223          *
224          * Note that PS/WM thread counts depend on the WIZ hashing
225          * disable bit, which we don't touch here, but it's good
226          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
227          */
228         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
229                             GEN6_WIZ_HASHING_MASK,
230                             GEN6_WIZ_HASHING_16x4);
231 }
232
233 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine)
234 {
235         struct drm_i915_private *i915 = engine->i915;
236         struct i915_wa_list *wal = &engine->ctx_wa_list;
237
238         gen8_ctx_workarounds_init(engine);
239
240         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
241         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
242
243         /* WaDisableDopClockGating:bdw
244          *
245          * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
246          * to disable EUTC clock gating.
247          */
248         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
249                           DOP_CLOCK_GATING_DISABLE);
250
251         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
252                           GEN8_SAMPLER_POWER_BYPASS_DIS);
253
254         WA_SET_BIT_MASKED(HDC_CHICKEN0,
255                           /* WaForceContextSaveRestoreNonCoherent:bdw */
256                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
257                           /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
258                           (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
259 }
260
261 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine)
262 {
263         struct i915_wa_list *wal = &engine->ctx_wa_list;
264
265         gen8_ctx_workarounds_init(engine);
266
267         /* WaDisableThreadStallDopClockGating:chv */
268         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
269
270         /* Improve HiZ throughput on CHV. */
271         WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
272 }
273
274 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine)
275 {
276         struct drm_i915_private *i915 = engine->i915;
277         struct i915_wa_list *wal = &engine->ctx_wa_list;
278
279         if (HAS_LLC(i915)) {
280                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
281                  *
282                  * Must match Display Engine. See
283                  * WaCompressedResourceDisplayNewHashMode.
284                  */
285                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
286                                   GEN9_PBE_COMPRESSED_HASH_SELECTION);
287                 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
288                                   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
289         }
290
291         /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
292         /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
293         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
294                           FLOW_CONTROL_ENABLE |
295                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
296
297         /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
298         if (!IS_COFFEELAKE(i915))
299                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
300                                   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
301
302         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
303         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
304         WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
305                           GEN9_ENABLE_YV12_BUGFIX |
306                           GEN9_ENABLE_GPGPU_PREEMPTION);
307
308         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
309         /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
310         WA_SET_BIT_MASKED(CACHE_MODE_1,
311                           GEN8_4x4_STC_OPTIMIZATION_DISABLE |
312                           GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
313
314         /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
315         WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
316                           GEN9_CCS_TLB_PREFETCH_ENABLE);
317
318         /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
319         WA_SET_BIT_MASKED(HDC_CHICKEN0,
320                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
321                           HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
322
323         /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
324          * both tied to WaForceContextSaveRestoreNonCoherent
325          * in some hsds for skl. We keep the tie for all gen9. The
326          * documentation is a bit hazy and so we want to get common behaviour,
327          * even though there is no clear evidence we would need both on kbl/bxt.
328          * This area has been source of system hangs so we play it safe
329          * and mimic the skl regardless of what bspec says.
330          *
331          * Use Force Non-Coherent whenever executing a 3D context. This
332          * is a workaround for a possible hang in the unlikely event
333          * a TLB invalidation occurs during a PSD flush.
334          */
335
336         /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
337         WA_SET_BIT_MASKED(HDC_CHICKEN0,
338                           HDC_FORCE_NON_COHERENT);
339
340         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
341         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
342                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
343                                   GEN8_SAMPLER_POWER_BYPASS_DIS);
344
345         /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
346         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
347
348         /*
349          * Supporting preemption with fine-granularity requires changes in the
350          * batch buffer programming. Since we can't break old userspace, we
351          * need to set our default preemption level to safe value. Userspace is
352          * still able to use more fine-grained preemption levels, since in
353          * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
354          * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
355          * not real HW workarounds, but merely a way to start using preemption
356          * while maintaining old contract with userspace.
357          */
358
359         /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
360         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
361
362         /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
363         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
364                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
365                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
366
367         /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
368         if (IS_GEN9_LP(i915))
369                 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
370 }
371
372 static void skl_tune_iz_hashing(struct intel_engine_cs *engine)
373 {
374         struct drm_i915_private *i915 = engine->i915;
375         struct i915_wa_list *wal = &engine->ctx_wa_list;
376         u8 vals[3] = { 0, 0, 0 };
377         unsigned int i;
378
379         for (i = 0; i < 3; i++) {
380                 u8 ss;
381
382                 /*
383                  * Only consider slices where one, and only one, subslice has 7
384                  * EUs
385                  */
386                 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
387                         continue;
388
389                 /*
390                  * subslice_7eu[i] != 0 (because of the check above) and
391                  * ss_max == 4 (maximum number of subslices possible per slice)
392                  *
393                  * ->    0 <= ss <= 3;
394                  */
395                 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
396                 vals[i] = 3 - ss;
397         }
398
399         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
400                 return;
401
402         /* Tune IZ hashing. See intel_device_info_runtime_init() */
403         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
404                             GEN9_IZ_HASHING_MASK(2) |
405                             GEN9_IZ_HASHING_MASK(1) |
406                             GEN9_IZ_HASHING_MASK(0),
407                             GEN9_IZ_HASHING(2, vals[2]) |
408                             GEN9_IZ_HASHING(1, vals[1]) |
409                             GEN9_IZ_HASHING(0, vals[0]));
410 }
411
412 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine)
413 {
414         gen9_ctx_workarounds_init(engine);
415         skl_tune_iz_hashing(engine);
416 }
417
418 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine)
419 {
420         struct i915_wa_list *wal = &engine->ctx_wa_list;
421
422         gen9_ctx_workarounds_init(engine);
423
424         /* WaDisableThreadStallDopClockGating:bxt */
425         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
426                           STALL_DOP_GATING_DISABLE);
427
428         /* WaToEnableHwFixForPushConstHWBug:bxt */
429         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
430                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
431 }
432
433 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine)
434 {
435         struct drm_i915_private *i915 = engine->i915;
436         struct i915_wa_list *wal = &engine->ctx_wa_list;
437
438         gen9_ctx_workarounds_init(engine);
439
440         /* WaToEnableHwFixForPushConstHWBug:kbl */
441         if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
442                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
443                                   GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
444
445         /* WaDisableSbeCacheDispatchPortSharing:kbl */
446         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
447                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
448 }
449
450 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine)
451 {
452         struct i915_wa_list *wal = &engine->ctx_wa_list;
453
454         gen9_ctx_workarounds_init(engine);
455
456         /* WaToEnableHwFixForPushConstHWBug:glk */
457         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
458                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
459 }
460
461 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine)
462 {
463         struct i915_wa_list *wal = &engine->ctx_wa_list;
464
465         gen9_ctx_workarounds_init(engine);
466
467         /* WaToEnableHwFixForPushConstHWBug:cfl */
468         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
469                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
470
471         /* WaDisableSbeCacheDispatchPortSharing:cfl */
472         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
473                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
474 }
475
476 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine)
477 {
478         struct drm_i915_private *i915 = engine->i915;
479         struct i915_wa_list *wal = &engine->ctx_wa_list;
480
481         /* WaForceContextSaveRestoreNonCoherent:cnl */
482         WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
483                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
484
485         /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
486         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
487                 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
488
489         /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
490         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
491                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
492
493         /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
494         if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
495                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
496                                   GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
497
498         /* WaPushConstantDereferenceHoldDisable:cnl */
499         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
500
501         /* FtrEnableFastAnisoL1BankingFix:cnl */
502         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
503
504         /* WaDisable3DMidCmdPreemption:cnl */
505         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
506
507         /* WaDisableGPGPUMidCmdPreemption:cnl */
508         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
509                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
510                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
511
512         /* WaDisableEarlyEOT:cnl */
513         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
514 }
515
516 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
517 {
518         struct drm_i915_private *i915 = engine->i915;
519         struct i915_wa_list *wal = &engine->ctx_wa_list;
520
521         /* Wa_1604370585:icl (pre-prod)
522          * Formerly known as WaPushConstantDereferenceHoldDisable
523          */
524         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
525                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
526                                   PUSH_CONSTANT_DEREF_DISABLE);
527
528         /* WaForceEnableNonCoherent:icl
529          * This is not the same workaround as in early Gen9 platforms, where
530          * lacking this could cause system hangs, but coherency performance
531          * overhead is high and only a few compute workloads really need it
532          * (the register is whitelisted in hardware now, so UMDs can opt in
533          * for coherency if they have a good reason).
534          */
535         WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
536
537         /* Wa_2006611047:icl (pre-prod)
538          * Formerly known as WaDisableImprovedTdlClkGating
539          */
540         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
541                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
542                                   GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
543
544         /* WaEnableStateCacheRedirectToCS:icl */
545         WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1,
546                           GEN11_STATE_CACHE_REDIRECT_TO_CS);
547
548         /* Wa_2006665173:icl (pre-prod) */
549         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
550                 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
551                                   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
552
553         /* WaEnableFloatBlendOptimization:icl */
554         wa_write_masked_or(wal,
555                            GEN10_CACHE_MODE_SS,
556                            0, /* write-only, so skip validation */
557                            _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
558 }
559
560 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
561 {
562         struct drm_i915_private *i915 = engine->i915;
563         struct i915_wa_list *wal = &engine->ctx_wa_list;
564
565         wa_init_start(wal, "context");
566
567         if (INTEL_GEN(i915) < 8)
568                 return;
569         else if (IS_BROADWELL(i915))
570                 bdw_ctx_workarounds_init(engine);
571         else if (IS_CHERRYVIEW(i915))
572                 chv_ctx_workarounds_init(engine);
573         else if (IS_SKYLAKE(i915))
574                 skl_ctx_workarounds_init(engine);
575         else if (IS_BROXTON(i915))
576                 bxt_ctx_workarounds_init(engine);
577         else if (IS_KABYLAKE(i915))
578                 kbl_ctx_workarounds_init(engine);
579         else if (IS_GEMINILAKE(i915))
580                 glk_ctx_workarounds_init(engine);
581         else if (IS_COFFEELAKE(i915))
582                 cfl_ctx_workarounds_init(engine);
583         else if (IS_CANNONLAKE(i915))
584                 cnl_ctx_workarounds_init(engine);
585         else if (IS_ICELAKE(i915))
586                 icl_ctx_workarounds_init(engine);
587         else
588                 MISSING_CASE(INTEL_GEN(i915));
589
590         wa_init_finish(wal);
591 }
592
593 int intel_engine_emit_ctx_wa(struct i915_request *rq)
594 {
595         struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
596         struct i915_wa *wa;
597         unsigned int i;
598         u32 *cs;
599         int ret;
600
601         if (wal->count == 0)
602                 return 0;
603
604         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
605         if (ret)
606                 return ret;
607
608         cs = intel_ring_begin(rq, (wal->count * 2 + 2));
609         if (IS_ERR(cs))
610                 return PTR_ERR(cs);
611
612         *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
613         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
614                 *cs++ = i915_mmio_reg_offset(wa->reg);
615                 *cs++ = wa->val;
616         }
617         *cs++ = MI_NOOP;
618
619         intel_ring_advance(rq, cs);
620
621         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
622         if (ret)
623                 return ret;
624
625         return 0;
626 }
627
628 static void
629 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
630 {
631         /* WaDisableKillLogic:bxt,skl,kbl */
632         if (!IS_COFFEELAKE(i915))
633                 wa_write_or(wal,
634                             GAM_ECOCHK,
635                             ECOCHK_DIS_TLB);
636
637         if (HAS_LLC(i915)) {
638                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
639                  *
640                  * Must match Display Engine. See
641                  * WaCompressedResourceDisplayNewHashMode.
642                  */
643                 wa_write_or(wal,
644                             MMCD_MISC_CTRL,
645                             MMCD_PCLA | MMCD_HOTSPOT_EN);
646         }
647
648         /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
649         wa_write_or(wal,
650                     GAM_ECOCHK,
651                     BDW_DISABLE_HDC_INVALIDATION);
652 }
653
654 static void
655 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
656 {
657         gen9_gt_workarounds_init(i915, wal);
658
659         /* WaDisableGafsUnitClkGating:skl */
660         wa_write_or(wal,
661                     GEN7_UCGCTL4,
662                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
663
664         /* WaInPlaceDecompressionHang:skl */
665         if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
666                 wa_write_or(wal,
667                             GEN9_GAMT_ECO_REG_RW_IA,
668                             GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
669 }
670
671 static void
672 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
673 {
674         gen9_gt_workarounds_init(i915, wal);
675
676         /* WaInPlaceDecompressionHang:bxt */
677         wa_write_or(wal,
678                     GEN9_GAMT_ECO_REG_RW_IA,
679                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
680 }
681
682 static void
683 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
684 {
685         gen9_gt_workarounds_init(i915, wal);
686
687         /* WaDisableDynamicCreditSharing:kbl */
688         if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
689                 wa_write_or(wal,
690                             GAMT_CHKN_BIT_REG,
691                             GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
692
693         /* WaDisableGafsUnitClkGating:kbl */
694         wa_write_or(wal,
695                     GEN7_UCGCTL4,
696                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
697
698         /* WaInPlaceDecompressionHang:kbl */
699         wa_write_or(wal,
700                     GEN9_GAMT_ECO_REG_RW_IA,
701                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
702 }
703
704 static void
705 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
706 {
707         gen9_gt_workarounds_init(i915, wal);
708 }
709
710 static void
711 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
712 {
713         gen9_gt_workarounds_init(i915, wal);
714
715         /* WaDisableGafsUnitClkGating:cfl */
716         wa_write_or(wal,
717                     GEN7_UCGCTL4,
718                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
719
720         /* WaInPlaceDecompressionHang:cfl */
721         wa_write_or(wal,
722                     GEN9_GAMT_ECO_REG_RW_IA,
723                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
724 }
725
726 static void
727 wa_init_mcr(struct drm_i915_private *dev_priv, struct i915_wa_list *wal)
728 {
729         const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
730         u32 mcr_slice_subslice_mask;
731
732         /*
733          * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
734          * L3Banks could be fused off in single slice scenario. If that is
735          * the case, we might need to program MCR select to a valid L3Bank
736          * by default, to make sure we correctly read certain registers
737          * later on (in the range 0xB100 - 0xB3FF).
738          * This might be incompatible with
739          * WaProgramMgsrForCorrectSliceSpecificMmioReads.
740          * Fortunately, this should not happen in production hardware, so
741          * we only assert that this is the case (instead of implementing
742          * something more complex that requires checking the range of every
743          * MMIO read).
744          */
745         if (INTEL_GEN(dev_priv) >= 10 &&
746             is_power_of_2(sseu->slice_mask)) {
747                 /*
748                  * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
749                  * enabled subslice, no need to redirect MCR packet
750                  */
751                 u32 slice = fls(sseu->slice_mask);
752                 u32 fuse3 = I915_READ(GEN10_MIRROR_FUSE3);
753                 u8 ss_mask = sseu->subslice_mask[slice];
754
755                 u8 enabled_mask = (ss_mask | ss_mask >>
756                                    GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
757                 u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
758
759                 /*
760                  * Production silicon should have matched L3Bank and
761                  * subslice enabled
762                  */
763                 WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
764         }
765
766         if (INTEL_GEN(dev_priv) >= 11)
767                 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
768                                           GEN11_MCR_SUBSLICE_MASK;
769         else
770                 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
771                                           GEN8_MCR_SUBSLICE_MASK;
772         /*
773          * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
774          * Before any MMIO read into slice/subslice specific registers, MCR
775          * packet control register needs to be programmed to point to any
776          * enabled s/ss pair. Otherwise, incorrect values will be returned.
777          * This means each subsequent MMIO read will be forwarded to an
778          * specific s/ss combination, but this is OK since these registers
779          * are consistent across s/ss in almost all cases. In the rare
780          * occasions, such as INSTDONE, where this value is dependent
781          * on s/ss combo, the read should be done with read_subslice_reg.
782          */
783         wa_write_masked_or(wal,
784                            GEN8_MCR_SELECTOR,
785                            mcr_slice_subslice_mask,
786                            intel_calculate_mcr_s_ss_select(dev_priv));
787 }
788
789 static void
790 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
791 {
792         wa_init_mcr(i915, wal);
793
794         /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
795         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
796                 wa_write_or(wal,
797                             GAMT_CHKN_BIT_REG,
798                             GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
799
800         /* WaInPlaceDecompressionHang:cnl */
801         wa_write_or(wal,
802                     GEN9_GAMT_ECO_REG_RW_IA,
803                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
804 }
805
806 static void
807 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
808 {
809         wa_init_mcr(i915, wal);
810
811         /* WaInPlaceDecompressionHang:icl */
812         wa_write_or(wal,
813                     GEN9_GAMT_ECO_REG_RW_IA,
814                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
815
816         /* WaModifyGamTlbPartitioning:icl */
817         wa_write_masked_or(wal,
818                            GEN11_GACB_PERF_CTRL,
819                            GEN11_HASH_CTRL_MASK,
820                            GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
821
822         /* Wa_1405766107:icl
823          * Formerly known as WaCL2SFHalfMaxAlloc
824          */
825         wa_write_or(wal,
826                     GEN11_LSN_UNSLCVC,
827                     GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
828                     GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
829
830         /* Wa_220166154:icl
831          * Formerly known as WaDisCtxReload
832          */
833         wa_write_or(wal,
834                     GEN8_GAMW_ECO_DEV_RW_IA,
835                     GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
836
837         /* Wa_1405779004:icl (pre-prod) */
838         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
839                 wa_write_or(wal,
840                             SLICE_UNIT_LEVEL_CLKGATE,
841                             MSCUNIT_CLKGATE_DIS);
842
843         /* Wa_1406680159:icl */
844         wa_write_or(wal,
845                     SUBSLICE_UNIT_LEVEL_CLKGATE,
846                     GWUNIT_CLKGATE_DIS);
847
848         /* Wa_1406838659:icl (pre-prod) */
849         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
850                 wa_write_or(wal,
851                             INF_UNIT_LEVEL_CLKGATE,
852                             CGPSF_CLKGATE_DIS);
853
854         /* Wa_1406463099:icl
855          * Formerly known as WaGamTlbPendError
856          */
857         wa_write_or(wal,
858                     GAMT_CHKN_BIT_REG,
859                     GAMT_CHKN_DISABLE_L3_COH_PIPE);
860 }
861
862 static void
863 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
864 {
865         if (INTEL_GEN(i915) < 8)
866                 return;
867         else if (IS_BROADWELL(i915))
868                 return;
869         else if (IS_CHERRYVIEW(i915))
870                 return;
871         else if (IS_SKYLAKE(i915))
872                 skl_gt_workarounds_init(i915, wal);
873         else if (IS_BROXTON(i915))
874                 bxt_gt_workarounds_init(i915, wal);
875         else if (IS_KABYLAKE(i915))
876                 kbl_gt_workarounds_init(i915, wal);
877         else if (IS_GEMINILAKE(i915))
878                 glk_gt_workarounds_init(i915, wal);
879         else if (IS_COFFEELAKE(i915))
880                 cfl_gt_workarounds_init(i915, wal);
881         else if (IS_CANNONLAKE(i915))
882                 cnl_gt_workarounds_init(i915, wal);
883         else if (IS_ICELAKE(i915))
884                 icl_gt_workarounds_init(i915, wal);
885         else
886                 MISSING_CASE(INTEL_GEN(i915));
887 }
888
889 void intel_gt_init_workarounds(struct drm_i915_private *i915)
890 {
891         struct i915_wa_list *wal = &i915->gt_wa_list;
892
893         wa_init_start(wal, "GT");
894         gt_init_workarounds(i915, wal);
895         wa_init_finish(wal);
896 }
897
898 static enum forcewake_domains
899 wal_get_fw_for_rmw(struct drm_i915_private *dev_priv,
900                    const struct i915_wa_list *wal)
901 {
902         enum forcewake_domains fw = 0;
903         struct i915_wa *wa;
904         unsigned int i;
905
906         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
907                 fw |= intel_uncore_forcewake_for_reg(dev_priv,
908                                                      wa->reg,
909                                                      FW_REG_READ |
910                                                      FW_REG_WRITE);
911
912         return fw;
913 }
914
915 static void
916 wa_list_apply(struct drm_i915_private *dev_priv, const struct i915_wa_list *wal)
917 {
918         enum forcewake_domains fw;
919         unsigned long flags;
920         struct i915_wa *wa;
921         unsigned int i;
922
923         if (!wal->count)
924                 return;
925
926         fw = wal_get_fw_for_rmw(dev_priv, wal);
927
928         spin_lock_irqsave(&dev_priv->uncore.lock, flags);
929         intel_uncore_forcewake_get__locked(dev_priv, fw);
930
931         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
932                 u32 val = I915_READ_FW(wa->reg);
933
934                 val &= ~wa->mask;
935                 val |= wa->val;
936
937                 I915_WRITE_FW(wa->reg, val);
938         }
939
940         intel_uncore_forcewake_put__locked(dev_priv, fw);
941         spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
942 }
943
944 void intel_gt_apply_workarounds(struct drm_i915_private *dev_priv)
945 {
946         wa_list_apply(dev_priv, &dev_priv->gt_wa_list);
947 }
948
949 static bool
950 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
951 {
952         if ((cur ^ wa->val) & wa->mask) {
953                 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
954                           name, from, i915_mmio_reg_offset(wa->reg), cur,
955                           cur & wa->mask, wa->val, wa->mask);
956
957                 return false;
958         }
959
960         return true;
961 }
962
963 static bool wa_list_verify(struct drm_i915_private *dev_priv,
964                            const struct i915_wa_list *wal,
965                            const char *from)
966 {
967         struct i915_wa *wa;
968         unsigned int i;
969         bool ok = true;
970
971         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
972                 ok &= wa_verify(wa, I915_READ(wa->reg), wal->name, from);
973
974         return ok;
975 }
976
977 bool intel_gt_verify_workarounds(struct drm_i915_private *dev_priv,
978                                  const char *from)
979 {
980         return wa_list_verify(dev_priv, &dev_priv->gt_wa_list, from);
981 }
982
983 static void
984 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
985 {
986         struct i915_wa wa = {
987                 .reg = reg
988         };
989
990         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
991                 return;
992
993         _wa_add(wal, &wa);
994 }
995
996 static void gen9_whitelist_build(struct i915_wa_list *w)
997 {
998         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
999         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1000
1001         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1002         whitelist_reg(w, GEN8_CS_CHICKEN1);
1003
1004         /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1005         whitelist_reg(w, GEN8_HDC_CHICKEN1);
1006 }
1007
1008 static void skl_whitelist_build(struct i915_wa_list *w)
1009 {
1010         gen9_whitelist_build(w);
1011
1012         /* WaDisableLSQCROPERFforOCL:skl */
1013         whitelist_reg(w, GEN8_L3SQCREG4);
1014 }
1015
1016 static void bxt_whitelist_build(struct i915_wa_list *w)
1017 {
1018         gen9_whitelist_build(w);
1019 }
1020
1021 static void kbl_whitelist_build(struct i915_wa_list *w)
1022 {
1023         gen9_whitelist_build(w);
1024
1025         /* WaDisableLSQCROPERFforOCL:kbl */
1026         whitelist_reg(w, GEN8_L3SQCREG4);
1027 }
1028
1029 static void glk_whitelist_build(struct i915_wa_list *w)
1030 {
1031         gen9_whitelist_build(w);
1032
1033         /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1034         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1035 }
1036
1037 static void cfl_whitelist_build(struct i915_wa_list *w)
1038 {
1039         gen9_whitelist_build(w);
1040 }
1041
1042 static void cnl_whitelist_build(struct i915_wa_list *w)
1043 {
1044         /* WaEnablePreemptionGranularityControlByUMD:cnl */
1045         whitelist_reg(w, GEN8_CS_CHICKEN1);
1046 }
1047
1048 static void icl_whitelist_build(struct i915_wa_list *w)
1049 {
1050         /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1051         whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1052
1053         /* WaAllowUMDToModifySamplerMode:icl */
1054         whitelist_reg(w, GEN10_SAMPLER_MODE);
1055 }
1056
1057 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1058 {
1059         struct drm_i915_private *i915 = engine->i915;
1060         struct i915_wa_list *w = &engine->whitelist;
1061
1062         GEM_BUG_ON(engine->id != RCS);
1063
1064         wa_init_start(w, "whitelist");
1065
1066         if (INTEL_GEN(i915) < 8)
1067                 return;
1068         else if (IS_BROADWELL(i915))
1069                 return;
1070         else if (IS_CHERRYVIEW(i915))
1071                 return;
1072         else if (IS_SKYLAKE(i915))
1073                 skl_whitelist_build(w);
1074         else if (IS_BROXTON(i915))
1075                 bxt_whitelist_build(w);
1076         else if (IS_KABYLAKE(i915))
1077                 kbl_whitelist_build(w);
1078         else if (IS_GEMINILAKE(i915))
1079                 glk_whitelist_build(w);
1080         else if (IS_COFFEELAKE(i915))
1081                 cfl_whitelist_build(w);
1082         else if (IS_CANNONLAKE(i915))
1083                 cnl_whitelist_build(w);
1084         else if (IS_ICELAKE(i915))
1085                 icl_whitelist_build(w);
1086         else
1087                 MISSING_CASE(INTEL_GEN(i915));
1088
1089         wa_init_finish(w);
1090 }
1091
1092 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1093 {
1094         struct drm_i915_private *dev_priv = engine->i915;
1095         const struct i915_wa_list *wal = &engine->whitelist;
1096         const u32 base = engine->mmio_base;
1097         struct i915_wa *wa;
1098         unsigned int i;
1099
1100         if (!wal->count)
1101                 return;
1102
1103         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1104                 I915_WRITE(RING_FORCE_TO_NONPRIV(base, i),
1105                            i915_mmio_reg_offset(wa->reg));
1106
1107         /* And clear the rest just in case of garbage */
1108         for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1109                 I915_WRITE(RING_FORCE_TO_NONPRIV(base, i),
1110                            i915_mmio_reg_offset(RING_NOPID(base)));
1111 }
1112
1113 static void
1114 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1115 {
1116         struct drm_i915_private *i915 = engine->i915;
1117
1118         if (IS_ICELAKE(i915)) {
1119                 /* This is not an Wa. Enable for better image quality */
1120                 wa_masked_en(wal,
1121                              _3D_CHICKEN3,
1122                              _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1123
1124                 /* WaPipelineFlushCoherentLines:icl */
1125                 wa_write_or(wal,
1126                             GEN8_L3SQCREG4,
1127                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1128
1129                 /*
1130                  * Wa_1405543622:icl
1131                  * Formerly known as WaGAPZPriorityScheme
1132                  */
1133                 wa_write_or(wal,
1134                             GEN8_GARBCNTL,
1135                             GEN11_ARBITRATION_PRIO_ORDER_MASK);
1136
1137                 /*
1138                  * Wa_1604223664:icl
1139                  * Formerly known as WaL3BankAddressHashing
1140                  */
1141                 wa_write_masked_or(wal,
1142                                    GEN8_GARBCNTL,
1143                                    GEN11_HASH_CTRL_EXCL_MASK,
1144                                    GEN11_HASH_CTRL_EXCL_BIT0);
1145                 wa_write_masked_or(wal,
1146                                    GEN11_GLBLINVL,
1147                                    GEN11_BANK_HASH_ADDR_EXCL_MASK,
1148                                    GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1149
1150                 /*
1151                  * Wa_1405733216:icl
1152                  * Formerly known as WaDisableCleanEvicts
1153                  */
1154                 wa_write_or(wal,
1155                             GEN8_L3SQCREG4,
1156                             GEN11_LQSC_CLEAN_EVICT_DISABLE);
1157
1158                 /* WaForwardProgressSoftReset:icl */
1159                 wa_write_or(wal,
1160                             GEN10_SCRATCH_LNCF2,
1161                             PMFLUSHDONE_LNICRSDROP |
1162                             PMFLUSH_GAPL3UNBLOCK |
1163                             PMFLUSHDONE_LNEBLK);
1164
1165                 /* Wa_1406609255:icl (pre-prod) */
1166                 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1167                         wa_write_or(wal,
1168                                     GEN7_SARCHKMD,
1169                                     GEN7_DISABLE_DEMAND_PREFETCH |
1170                                     GEN7_DISABLE_SAMPLER_PREFETCH);
1171         }
1172
1173         if (IS_GEN(i915, 9) || IS_CANNONLAKE(i915)) {
1174                 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,cnl */
1175                 wa_masked_en(wal,
1176                              GEN7_FF_SLICE_CS_CHICKEN1,
1177                              GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1178         }
1179
1180         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1181                 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1182                 wa_write_or(wal,
1183                             GEN8_GARBCNTL,
1184                             GEN9_GAPS_TSV_CREDIT_DISABLE);
1185         }
1186
1187         if (IS_BROXTON(i915)) {
1188                 /* WaDisablePooledEuLoadBalancingFix:bxt */
1189                 wa_masked_en(wal,
1190                              FF_SLICE_CS_CHICKEN2,
1191                              GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1192         }
1193
1194         if (IS_GEN(i915, 9)) {
1195                 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1196                 wa_masked_en(wal,
1197                              GEN9_CSFE_CHICKEN1_RCS,
1198                              GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1199
1200                 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1201                 wa_write_or(wal,
1202                             BDW_SCRATCH1,
1203                             GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1204
1205                 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1206                 if (IS_GEN9_LP(i915))
1207                         wa_write_masked_or(wal,
1208                                            GEN8_L3SQCREG1,
1209                                            L3_PRIO_CREDITS_MASK,
1210                                            L3_GENERAL_PRIO_CREDITS(62) |
1211                                            L3_HIGH_PRIO_CREDITS(2));
1212
1213                 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1214                 wa_write_or(wal,
1215                             GEN8_L3SQCREG4,
1216                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1217         }
1218 }
1219
1220 static void
1221 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1222 {
1223         struct drm_i915_private *i915 = engine->i915;
1224
1225         /* WaKBLVECSSemaphoreWaitPoll:kbl */
1226         if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1227                 wa_write(wal,
1228                          RING_SEMA_WAIT_POLL(engine->mmio_base),
1229                          1);
1230         }
1231 }
1232
1233 static void
1234 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1235 {
1236         if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1237                 return;
1238
1239         if (engine->id == RCS)
1240                 rcs_engine_wa_init(engine, wal);
1241         else
1242                 xcs_engine_wa_init(engine, wal);
1243 }
1244
1245 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1246 {
1247         struct i915_wa_list *wal = &engine->wa_list;
1248
1249         if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
1250                 return;
1251
1252         wa_init_start(wal, engine->name);
1253         engine_init_workarounds(engine, wal);
1254         wa_init_finish(wal);
1255 }
1256
1257 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1258 {
1259         wa_list_apply(engine->i915, &engine->wa_list);
1260 }
1261
1262 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1263 #include "selftests/intel_workarounds.c"
1264 #endif