5c010551cd2364f3211c3ea0002ace2380dd04c3
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / intel_workarounds.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2018 Intel Corporation
5  */
6
7 #include "i915_drv.h"
8 #include "intel_workarounds.h"
9
10 /**
11  * DOC: Hardware workarounds
12  *
13  * This file is intended as a central place to implement most [1]_ of the
14  * required workarounds for hardware to work as originally intended. They fall
15  * in five basic categories depending on how/when they are applied:
16  *
17  * - Workarounds that touch registers that are saved/restored to/from the HW
18  *   context image. The list is emitted (via Load Register Immediate commands)
19  *   everytime a new context is created.
20  * - GT workarounds. The list of these WAs is applied whenever these registers
21  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
22  * - Display workarounds. The list is applied during display clock-gating
23  *   initialization.
24  * - Workarounds that whitelist a privileged register, so that UMDs can manage
25  *   them directly. This is just a special case of a MMMIO workaround (as we
26  *   write the list of these to/be-whitelisted registers to some special HW
27  *   registers).
28  * - Workaround batchbuffers, that get executed automatically by the hardware
29  *   on every HW context restore.
30  *
31  * .. [1] Please notice that there are other WAs that, due to their nature,
32  *    cannot be applied from a central place. Those are peppered around the rest
33  *    of the code, as needed.
34  *
35  * .. [2] Technically, some registers are powercontext saved & restored, so they
36  *    survive a suspend/resume. In practice, writing them again is not too
37  *    costly and simplifies things. We can revisit this in the future.
38  *
39  * Layout
40  * ''''''
41  *
42  * Keep things in this file ordered by WA type, as per the above (context, GT,
43  * display, register whitelist, batchbuffer). Then, inside each type, keep the
44  * following order:
45  *
46  * - Infrastructure functions and macros
47  * - WAs per platform in standard gen/chrono order
48  * - Public functions to init or apply the given workaround type.
49  */
50
51 static void wa_init_start(struct i915_wa_list *wal, const char *name)
52 {
53         wal->name = name;
54 }
55
56 #define WA_LIST_CHUNK (1 << 4)
57
58 static void wa_init_finish(struct i915_wa_list *wal)
59 {
60         /* Trim unused entries. */
61         if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
62                 struct i915_wa *list = kmemdup(wal->list,
63                                                wal->count * sizeof(*list),
64                                                GFP_KERNEL);
65
66                 if (list) {
67                         kfree(wal->list);
68                         wal->list = list;
69                 }
70         }
71
72         if (!wal->count)
73                 return;
74
75         DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
76                          wal->wa_count, wal->name);
77 }
78
79 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
80 {
81         unsigned int addr = i915_mmio_reg_offset(wa->reg);
82         unsigned int start = 0, end = wal->count;
83         const unsigned int grow = WA_LIST_CHUNK;
84         struct i915_wa *wa_;
85
86         GEM_BUG_ON(!is_power_of_2(grow));
87
88         if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
89                 struct i915_wa *list;
90
91                 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
92                                      GFP_KERNEL);
93                 if (!list) {
94                         DRM_ERROR("No space for workaround init!\n");
95                         return;
96                 }
97
98                 if (wal->list)
99                         memcpy(list, wal->list, sizeof(*wa) * wal->count);
100
101                 wal->list = list;
102         }
103
104         while (start < end) {
105                 unsigned int mid = start + (end - start) / 2;
106
107                 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
108                         start = mid + 1;
109                 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
110                         end = mid;
111                 } else {
112                         wa_ = &wal->list[mid];
113
114                         if ((wa->mask & ~wa_->mask) == 0) {
115                                 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
116                                           i915_mmio_reg_offset(wa_->reg),
117                                           wa_->mask, wa_->val);
118
119                                 wa_->val &= ~wa->mask;
120                         }
121
122                         wal->wa_count++;
123                         wa_->val |= wa->val;
124                         wa_->mask |= wa->mask;
125                         return;
126                 }
127         }
128
129         wal->wa_count++;
130         wa_ = &wal->list[wal->count++];
131         *wa_ = *wa;
132
133         while (wa_-- > wal->list) {
134                 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
135                            i915_mmio_reg_offset(wa_[1].reg));
136                 if (i915_mmio_reg_offset(wa_[1].reg) >
137                     i915_mmio_reg_offset(wa_[0].reg))
138                         break;
139
140                 swap(wa_[1], wa_[0]);
141         }
142 }
143
144 static void
145 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
146                    u32 val)
147 {
148         struct i915_wa wa = {
149                 .reg = reg,
150                 .mask = mask,
151                 .val = val
152         };
153
154         _wa_add(wal, &wa);
155 }
156
157 static void
158 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
159 {
160         wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
161 }
162
163 static void
164 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
165 {
166         wa_write_masked_or(wal, reg, ~0, val);
167 }
168
169 static void
170 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
171 {
172         wa_write_masked_or(wal, reg, val, val);
173 }
174
175 #define WA_SET_BIT_MASKED(addr, mask) \
176         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
177
178 #define WA_CLR_BIT_MASKED(addr, mask) \
179         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
180
181 #define WA_SET_FIELD_MASKED(addr, mask, value) \
182         wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
183
184 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine)
185 {
186         struct i915_wa_list *wal = &engine->ctx_wa_list;
187
188         WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
189
190         /* WaDisableAsyncFlipPerfMode:bdw,chv */
191         WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
192
193         /* WaDisablePartialInstShootdown:bdw,chv */
194         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
195                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
196
197         /* Use Force Non-Coherent whenever executing a 3D context. This is a
198          * workaround for for a possible hang in the unlikely event a TLB
199          * invalidation occurs during a PSD flush.
200          */
201         /* WaForceEnableNonCoherent:bdw,chv */
202         /* WaHdcDisableFetchWhenMasked:bdw,chv */
203         WA_SET_BIT_MASKED(HDC_CHICKEN0,
204                           HDC_DONOT_FETCH_MEM_WHEN_MASKED |
205                           HDC_FORCE_NON_COHERENT);
206
207         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
208          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
209          *  polygons in the same 8x4 pixel/sample area to be processed without
210          *  stalling waiting for the earlier ones to write to Hierarchical Z
211          *  buffer."
212          *
213          * This optimization is off by default for BDW and CHV; turn it on.
214          */
215         WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
216
217         /* Wa4x4STCOptimizationDisable:bdw,chv */
218         WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
219
220         /*
221          * BSpec recommends 8x4 when MSAA is used,
222          * however in practice 16x4 seems fastest.
223          *
224          * Note that PS/WM thread counts depend on the WIZ hashing
225          * disable bit, which we don't touch here, but it's good
226          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
227          */
228         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
229                             GEN6_WIZ_HASHING_MASK,
230                             GEN6_WIZ_HASHING_16x4);
231 }
232
233 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine)
234 {
235         struct drm_i915_private *i915 = engine->i915;
236         struct i915_wa_list *wal = &engine->ctx_wa_list;
237
238         gen8_ctx_workarounds_init(engine);
239
240         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
241         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
242
243         /* WaDisableDopClockGating:bdw
244          *
245          * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
246          * to disable EUTC clock gating.
247          */
248         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
249                           DOP_CLOCK_GATING_DISABLE);
250
251         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
252                           GEN8_SAMPLER_POWER_BYPASS_DIS);
253
254         WA_SET_BIT_MASKED(HDC_CHICKEN0,
255                           /* WaForceContextSaveRestoreNonCoherent:bdw */
256                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
257                           /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
258                           (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
259 }
260
261 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine)
262 {
263         struct i915_wa_list *wal = &engine->ctx_wa_list;
264
265         gen8_ctx_workarounds_init(engine);
266
267         /* WaDisableThreadStallDopClockGating:chv */
268         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
269
270         /* Improve HiZ throughput on CHV. */
271         WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
272 }
273
274 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine)
275 {
276         struct drm_i915_private *i915 = engine->i915;
277         struct i915_wa_list *wal = &engine->ctx_wa_list;
278
279         if (HAS_LLC(i915)) {
280                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
281                  *
282                  * Must match Display Engine. See
283                  * WaCompressedResourceDisplayNewHashMode.
284                  */
285                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
286                                   GEN9_PBE_COMPRESSED_HASH_SELECTION);
287                 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
288                                   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
289         }
290
291         /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
292         /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
293         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
294                           FLOW_CONTROL_ENABLE |
295                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
296
297         /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
298         if (!IS_COFFEELAKE(i915))
299                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
300                                   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
301
302         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
303         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
304         WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
305                           GEN9_ENABLE_YV12_BUGFIX |
306                           GEN9_ENABLE_GPGPU_PREEMPTION);
307
308         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
309         /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
310         WA_SET_BIT_MASKED(CACHE_MODE_1,
311                           GEN8_4x4_STC_OPTIMIZATION_DISABLE |
312                           GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
313
314         /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
315         WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
316                           GEN9_CCS_TLB_PREFETCH_ENABLE);
317
318         /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
319         WA_SET_BIT_MASKED(HDC_CHICKEN0,
320                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
321                           HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
322
323         /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
324          * both tied to WaForceContextSaveRestoreNonCoherent
325          * in some hsds for skl. We keep the tie for all gen9. The
326          * documentation is a bit hazy and so we want to get common behaviour,
327          * even though there is no clear evidence we would need both on kbl/bxt.
328          * This area has been source of system hangs so we play it safe
329          * and mimic the skl regardless of what bspec says.
330          *
331          * Use Force Non-Coherent whenever executing a 3D context. This
332          * is a workaround for a possible hang in the unlikely event
333          * a TLB invalidation occurs during a PSD flush.
334          */
335
336         /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
337         WA_SET_BIT_MASKED(HDC_CHICKEN0,
338                           HDC_FORCE_NON_COHERENT);
339
340         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
341         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
342                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
343                                   GEN8_SAMPLER_POWER_BYPASS_DIS);
344
345         /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
346         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
347
348         /*
349          * Supporting preemption with fine-granularity requires changes in the
350          * batch buffer programming. Since we can't break old userspace, we
351          * need to set our default preemption level to safe value. Userspace is
352          * still able to use more fine-grained preemption levels, since in
353          * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
354          * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
355          * not real HW workarounds, but merely a way to start using preemption
356          * while maintaining old contract with userspace.
357          */
358
359         /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
360         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
361
362         /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
363         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
364                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
365                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
366
367         /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
368         if (IS_GEN9_LP(i915))
369                 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
370 }
371
372 static void skl_tune_iz_hashing(struct intel_engine_cs *engine)
373 {
374         struct drm_i915_private *i915 = engine->i915;
375         struct i915_wa_list *wal = &engine->ctx_wa_list;
376         u8 vals[3] = { 0, 0, 0 };
377         unsigned int i;
378
379         for (i = 0; i < 3; i++) {
380                 u8 ss;
381
382                 /*
383                  * Only consider slices where one, and only one, subslice has 7
384                  * EUs
385                  */
386                 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
387                         continue;
388
389                 /*
390                  * subslice_7eu[i] != 0 (because of the check above) and
391                  * ss_max == 4 (maximum number of subslices possible per slice)
392                  *
393                  * ->    0 <= ss <= 3;
394                  */
395                 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
396                 vals[i] = 3 - ss;
397         }
398
399         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
400                 return;
401
402         /* Tune IZ hashing. See intel_device_info_runtime_init() */
403         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
404                             GEN9_IZ_HASHING_MASK(2) |
405                             GEN9_IZ_HASHING_MASK(1) |
406                             GEN9_IZ_HASHING_MASK(0),
407                             GEN9_IZ_HASHING(2, vals[2]) |
408                             GEN9_IZ_HASHING(1, vals[1]) |
409                             GEN9_IZ_HASHING(0, vals[0]));
410 }
411
412 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine)
413 {
414         gen9_ctx_workarounds_init(engine);
415         skl_tune_iz_hashing(engine);
416 }
417
418 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine)
419 {
420         struct i915_wa_list *wal = &engine->ctx_wa_list;
421
422         gen9_ctx_workarounds_init(engine);
423
424         /* WaDisableThreadStallDopClockGating:bxt */
425         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
426                           STALL_DOP_GATING_DISABLE);
427
428         /* WaToEnableHwFixForPushConstHWBug:bxt */
429         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
430                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
431 }
432
433 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine)
434 {
435         struct drm_i915_private *i915 = engine->i915;
436         struct i915_wa_list *wal = &engine->ctx_wa_list;
437
438         gen9_ctx_workarounds_init(engine);
439
440         /* WaToEnableHwFixForPushConstHWBug:kbl */
441         if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
442                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
443                                   GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
444
445         /* WaDisableSbeCacheDispatchPortSharing:kbl */
446         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
447                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
448 }
449
450 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine)
451 {
452         struct i915_wa_list *wal = &engine->ctx_wa_list;
453
454         gen9_ctx_workarounds_init(engine);
455
456         /* WaToEnableHwFixForPushConstHWBug:glk */
457         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
458                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
459 }
460
461 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine)
462 {
463         struct i915_wa_list *wal = &engine->ctx_wa_list;
464
465         gen9_ctx_workarounds_init(engine);
466
467         /* WaToEnableHwFixForPushConstHWBug:cfl */
468         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
469                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
470
471         /* WaDisableSbeCacheDispatchPortSharing:cfl */
472         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
473                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
474 }
475
476 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine)
477 {
478         struct drm_i915_private *i915 = engine->i915;
479         struct i915_wa_list *wal = &engine->ctx_wa_list;
480
481         /* WaForceContextSaveRestoreNonCoherent:cnl */
482         WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
483                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
484
485         /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
486         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
487                 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
488
489         /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
490         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
491                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
492
493         /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
494         if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
495                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
496                                   GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
497
498         /* WaPushConstantDereferenceHoldDisable:cnl */
499         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
500
501         /* FtrEnableFastAnisoL1BankingFix:cnl */
502         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
503
504         /* WaDisable3DMidCmdPreemption:cnl */
505         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
506
507         /* WaDisableGPGPUMidCmdPreemption:cnl */
508         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
509                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
510                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
511
512         /* WaDisableEarlyEOT:cnl */
513         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
514 }
515
516 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
517 {
518         struct drm_i915_private *i915 = engine->i915;
519         struct i915_wa_list *wal = &engine->ctx_wa_list;
520
521         /* Wa_1604370585:icl (pre-prod)
522          * Formerly known as WaPushConstantDereferenceHoldDisable
523          */
524         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
525                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
526                                   PUSH_CONSTANT_DEREF_DISABLE);
527
528         /* WaForceEnableNonCoherent:icl
529          * This is not the same workaround as in early Gen9 platforms, where
530          * lacking this could cause system hangs, but coherency performance
531          * overhead is high and only a few compute workloads really need it
532          * (the register is whitelisted in hardware now, so UMDs can opt in
533          * for coherency if they have a good reason).
534          */
535         WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
536
537         /* Wa_2006611047:icl (pre-prod)
538          * Formerly known as WaDisableImprovedTdlClkGating
539          */
540         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
541                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
542                                   GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
543
544         /* WaEnableStateCacheRedirectToCS:icl */
545         WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1,
546                           GEN11_STATE_CACHE_REDIRECT_TO_CS);
547
548         /* Wa_2006665173:icl (pre-prod) */
549         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
550                 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
551                                   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
552 }
553
554 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
555 {
556         struct drm_i915_private *i915 = engine->i915;
557         struct i915_wa_list *wal = &engine->ctx_wa_list;
558
559         wa_init_start(wal, "context");
560
561         if (INTEL_GEN(i915) < 8)
562                 return;
563         else if (IS_BROADWELL(i915))
564                 bdw_ctx_workarounds_init(engine);
565         else if (IS_CHERRYVIEW(i915))
566                 chv_ctx_workarounds_init(engine);
567         else if (IS_SKYLAKE(i915))
568                 skl_ctx_workarounds_init(engine);
569         else if (IS_BROXTON(i915))
570                 bxt_ctx_workarounds_init(engine);
571         else if (IS_KABYLAKE(i915))
572                 kbl_ctx_workarounds_init(engine);
573         else if (IS_GEMINILAKE(i915))
574                 glk_ctx_workarounds_init(engine);
575         else if (IS_COFFEELAKE(i915))
576                 cfl_ctx_workarounds_init(engine);
577         else if (IS_CANNONLAKE(i915))
578                 cnl_ctx_workarounds_init(engine);
579         else if (IS_ICELAKE(i915))
580                 icl_ctx_workarounds_init(engine);
581         else
582                 MISSING_CASE(INTEL_GEN(i915));
583
584         wa_init_finish(wal);
585 }
586
587 int intel_engine_emit_ctx_wa(struct i915_request *rq)
588 {
589         struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
590         struct i915_wa *wa;
591         unsigned int i;
592         u32 *cs;
593         int ret;
594
595         if (wal->count == 0)
596                 return 0;
597
598         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
599         if (ret)
600                 return ret;
601
602         cs = intel_ring_begin(rq, (wal->count * 2 + 2));
603         if (IS_ERR(cs))
604                 return PTR_ERR(cs);
605
606         *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
607         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
608                 *cs++ = i915_mmio_reg_offset(wa->reg);
609                 *cs++ = wa->val;
610         }
611         *cs++ = MI_NOOP;
612
613         intel_ring_advance(rq, cs);
614
615         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
616         if (ret)
617                 return ret;
618
619         return 0;
620 }
621
622 static void
623 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
624 {
625         /* WaDisableKillLogic:bxt,skl,kbl */
626         if (!IS_COFFEELAKE(i915))
627                 wa_write_or(wal,
628                             GAM_ECOCHK,
629                             ECOCHK_DIS_TLB);
630
631         if (HAS_LLC(i915)) {
632                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
633                  *
634                  * Must match Display Engine. See
635                  * WaCompressedResourceDisplayNewHashMode.
636                  */
637                 wa_write_or(wal,
638                             MMCD_MISC_CTRL,
639                             MMCD_PCLA | MMCD_HOTSPOT_EN);
640         }
641
642         /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
643         wa_write_or(wal,
644                     GAM_ECOCHK,
645                     BDW_DISABLE_HDC_INVALIDATION);
646 }
647
648 static void
649 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
650 {
651         gen9_gt_workarounds_init(i915, wal);
652
653         /* WaDisableGafsUnitClkGating:skl */
654         wa_write_or(wal,
655                     GEN7_UCGCTL4,
656                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
657
658         /* WaInPlaceDecompressionHang:skl */
659         if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
660                 wa_write_or(wal,
661                             GEN9_GAMT_ECO_REG_RW_IA,
662                             GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
663 }
664
665 static void
666 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
667 {
668         gen9_gt_workarounds_init(i915, wal);
669
670         /* WaInPlaceDecompressionHang:bxt */
671         wa_write_or(wal,
672                     GEN9_GAMT_ECO_REG_RW_IA,
673                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
674 }
675
676 static void
677 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
678 {
679         gen9_gt_workarounds_init(i915, wal);
680
681         /* WaDisableDynamicCreditSharing:kbl */
682         if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
683                 wa_write_or(wal,
684                             GAMT_CHKN_BIT_REG,
685                             GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
686
687         /* WaDisableGafsUnitClkGating:kbl */
688         wa_write_or(wal,
689                     GEN7_UCGCTL4,
690                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
691
692         /* WaInPlaceDecompressionHang:kbl */
693         wa_write_or(wal,
694                     GEN9_GAMT_ECO_REG_RW_IA,
695                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
696 }
697
698 static void
699 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
700 {
701         gen9_gt_workarounds_init(i915, wal);
702 }
703
704 static void
705 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
706 {
707         gen9_gt_workarounds_init(i915, wal);
708
709         /* WaDisableGafsUnitClkGating:cfl */
710         wa_write_or(wal,
711                     GEN7_UCGCTL4,
712                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
713
714         /* WaInPlaceDecompressionHang:cfl */
715         wa_write_or(wal,
716                     GEN9_GAMT_ECO_REG_RW_IA,
717                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
718 }
719
720 static void
721 wa_init_mcr(struct drm_i915_private *dev_priv, struct i915_wa_list *wal)
722 {
723         const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
724         u32 mcr_slice_subslice_mask;
725
726         /*
727          * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
728          * L3Banks could be fused off in single slice scenario. If that is
729          * the case, we might need to program MCR select to a valid L3Bank
730          * by default, to make sure we correctly read certain registers
731          * later on (in the range 0xB100 - 0xB3FF).
732          * This might be incompatible with
733          * WaProgramMgsrForCorrectSliceSpecificMmioReads.
734          * Fortunately, this should not happen in production hardware, so
735          * we only assert that this is the case (instead of implementing
736          * something more complex that requires checking the range of every
737          * MMIO read).
738          */
739         if (INTEL_GEN(dev_priv) >= 10 &&
740             is_power_of_2(sseu->slice_mask)) {
741                 /*
742                  * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
743                  * enabled subslice, no need to redirect MCR packet
744                  */
745                 u32 slice = fls(sseu->slice_mask);
746                 u32 fuse3 = I915_READ(GEN10_MIRROR_FUSE3);
747                 u8 ss_mask = sseu->subslice_mask[slice];
748
749                 u8 enabled_mask = (ss_mask | ss_mask >>
750                                    GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
751                 u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
752
753                 /*
754                  * Production silicon should have matched L3Bank and
755                  * subslice enabled
756                  */
757                 WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
758         }
759
760         if (INTEL_GEN(dev_priv) >= 11)
761                 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
762                                           GEN11_MCR_SUBSLICE_MASK;
763         else
764                 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
765                                           GEN8_MCR_SUBSLICE_MASK;
766         /*
767          * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
768          * Before any MMIO read into slice/subslice specific registers, MCR
769          * packet control register needs to be programmed to point to any
770          * enabled s/ss pair. Otherwise, incorrect values will be returned.
771          * This means each subsequent MMIO read will be forwarded to an
772          * specific s/ss combination, but this is OK since these registers
773          * are consistent across s/ss in almost all cases. In the rare
774          * occasions, such as INSTDONE, where this value is dependent
775          * on s/ss combo, the read should be done with read_subslice_reg.
776          */
777         wa_write_masked_or(wal,
778                            GEN8_MCR_SELECTOR,
779                            mcr_slice_subslice_mask,
780                            intel_calculate_mcr_s_ss_select(dev_priv));
781 }
782
783 static void
784 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
785 {
786         wa_init_mcr(i915, wal);
787
788         /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
789         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
790                 wa_write_or(wal,
791                             GAMT_CHKN_BIT_REG,
792                             GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
793
794         /* WaInPlaceDecompressionHang:cnl */
795         wa_write_or(wal,
796                     GEN9_GAMT_ECO_REG_RW_IA,
797                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
798 }
799
800 static void
801 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
802 {
803         wa_init_mcr(i915, wal);
804
805         /* WaInPlaceDecompressionHang:icl */
806         wa_write_or(wal,
807                     GEN9_GAMT_ECO_REG_RW_IA,
808                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
809
810         /* WaModifyGamTlbPartitioning:icl */
811         wa_write_masked_or(wal,
812                            GEN11_GACB_PERF_CTRL,
813                            GEN11_HASH_CTRL_MASK,
814                            GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
815
816         /* Wa_1405766107:icl
817          * Formerly known as WaCL2SFHalfMaxAlloc
818          */
819         wa_write_or(wal,
820                     GEN11_LSN_UNSLCVC,
821                     GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
822                     GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
823
824         /* Wa_220166154:icl
825          * Formerly known as WaDisCtxReload
826          */
827         wa_write_or(wal,
828                     GEN8_GAMW_ECO_DEV_RW_IA,
829                     GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
830
831         /* Wa_1405779004:icl (pre-prod) */
832         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
833                 wa_write_or(wal,
834                             SLICE_UNIT_LEVEL_CLKGATE,
835                             MSCUNIT_CLKGATE_DIS);
836
837         /* Wa_1406680159:icl */
838         wa_write_or(wal,
839                     SUBSLICE_UNIT_LEVEL_CLKGATE,
840                     GWUNIT_CLKGATE_DIS);
841
842         /* Wa_1406838659:icl (pre-prod) */
843         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
844                 wa_write_or(wal,
845                             INF_UNIT_LEVEL_CLKGATE,
846                             CGPSF_CLKGATE_DIS);
847
848         /* Wa_1406463099:icl
849          * Formerly known as WaGamTlbPendError
850          */
851         wa_write_or(wal,
852                     GAMT_CHKN_BIT_REG,
853                     GAMT_CHKN_DISABLE_L3_COH_PIPE);
854 }
855
856 static void
857 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
858 {
859         if (INTEL_GEN(i915) < 8)
860                 return;
861         else if (IS_BROADWELL(i915))
862                 return;
863         else if (IS_CHERRYVIEW(i915))
864                 return;
865         else if (IS_SKYLAKE(i915))
866                 skl_gt_workarounds_init(i915, wal);
867         else if (IS_BROXTON(i915))
868                 bxt_gt_workarounds_init(i915, wal);
869         else if (IS_KABYLAKE(i915))
870                 kbl_gt_workarounds_init(i915, wal);
871         else if (IS_GEMINILAKE(i915))
872                 glk_gt_workarounds_init(i915, wal);
873         else if (IS_COFFEELAKE(i915))
874                 cfl_gt_workarounds_init(i915, wal);
875         else if (IS_CANNONLAKE(i915))
876                 cnl_gt_workarounds_init(i915, wal);
877         else if (IS_ICELAKE(i915))
878                 icl_gt_workarounds_init(i915, wal);
879         else
880                 MISSING_CASE(INTEL_GEN(i915));
881 }
882
883 void intel_gt_init_workarounds(struct drm_i915_private *i915)
884 {
885         struct i915_wa_list *wal = &i915->gt_wa_list;
886
887         wa_init_start(wal, "GT");
888         gt_init_workarounds(i915, wal);
889         wa_init_finish(wal);
890 }
891
892 static enum forcewake_domains
893 wal_get_fw_for_rmw(struct drm_i915_private *dev_priv,
894                    const struct i915_wa_list *wal)
895 {
896         enum forcewake_domains fw = 0;
897         struct i915_wa *wa;
898         unsigned int i;
899
900         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
901                 fw |= intel_uncore_forcewake_for_reg(dev_priv,
902                                                      wa->reg,
903                                                      FW_REG_READ |
904                                                      FW_REG_WRITE);
905
906         return fw;
907 }
908
909 static void
910 wa_list_apply(struct drm_i915_private *dev_priv, const struct i915_wa_list *wal)
911 {
912         enum forcewake_domains fw;
913         unsigned long flags;
914         struct i915_wa *wa;
915         unsigned int i;
916
917         if (!wal->count)
918                 return;
919
920         fw = wal_get_fw_for_rmw(dev_priv, wal);
921
922         spin_lock_irqsave(&dev_priv->uncore.lock, flags);
923         intel_uncore_forcewake_get__locked(dev_priv, fw);
924
925         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
926                 u32 val = I915_READ_FW(wa->reg);
927
928                 val &= ~wa->mask;
929                 val |= wa->val;
930
931                 I915_WRITE_FW(wa->reg, val);
932         }
933
934         intel_uncore_forcewake_put__locked(dev_priv, fw);
935         spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
936 }
937
938 void intel_gt_apply_workarounds(struct drm_i915_private *dev_priv)
939 {
940         wa_list_apply(dev_priv, &dev_priv->gt_wa_list);
941 }
942
943 static bool
944 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
945 {
946         if ((cur ^ wa->val) & wa->mask) {
947                 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
948                           name, from, i915_mmio_reg_offset(wa->reg), cur,
949                           cur & wa->mask, wa->val, wa->mask);
950
951                 return false;
952         }
953
954         return true;
955 }
956
957 static bool wa_list_verify(struct drm_i915_private *dev_priv,
958                            const struct i915_wa_list *wal,
959                            const char *from)
960 {
961         struct i915_wa *wa;
962         unsigned int i;
963         bool ok = true;
964
965         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
966                 ok &= wa_verify(wa, I915_READ(wa->reg), wal->name, from);
967
968         return ok;
969 }
970
971 bool intel_gt_verify_workarounds(struct drm_i915_private *dev_priv,
972                                  const char *from)
973 {
974         return wa_list_verify(dev_priv, &dev_priv->gt_wa_list, from);
975 }
976
977 static void
978 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
979 {
980         struct i915_wa wa = {
981                 .reg = reg
982         };
983
984         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
985                 return;
986
987         _wa_add(wal, &wa);
988 }
989
990 static void gen9_whitelist_build(struct i915_wa_list *w)
991 {
992         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
993         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
994
995         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
996         whitelist_reg(w, GEN8_CS_CHICKEN1);
997
998         /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
999         whitelist_reg(w, GEN8_HDC_CHICKEN1);
1000 }
1001
1002 static void skl_whitelist_build(struct i915_wa_list *w)
1003 {
1004         gen9_whitelist_build(w);
1005
1006         /* WaDisableLSQCROPERFforOCL:skl */
1007         whitelist_reg(w, GEN8_L3SQCREG4);
1008 }
1009
1010 static void bxt_whitelist_build(struct i915_wa_list *w)
1011 {
1012         gen9_whitelist_build(w);
1013 }
1014
1015 static void kbl_whitelist_build(struct i915_wa_list *w)
1016 {
1017         gen9_whitelist_build(w);
1018
1019         /* WaDisableLSQCROPERFforOCL:kbl */
1020         whitelist_reg(w, GEN8_L3SQCREG4);
1021 }
1022
1023 static void glk_whitelist_build(struct i915_wa_list *w)
1024 {
1025         gen9_whitelist_build(w);
1026
1027         /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1028         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1029 }
1030
1031 static void cfl_whitelist_build(struct i915_wa_list *w)
1032 {
1033         gen9_whitelist_build(w);
1034 }
1035
1036 static void cnl_whitelist_build(struct i915_wa_list *w)
1037 {
1038         /* WaEnablePreemptionGranularityControlByUMD:cnl */
1039         whitelist_reg(w, GEN8_CS_CHICKEN1);
1040 }
1041
1042 static void icl_whitelist_build(struct i915_wa_list *w)
1043 {
1044         /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1045         whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1046
1047         /* WaAllowUMDToModifySamplerMode:icl */
1048         whitelist_reg(w, GEN10_SAMPLER_MODE);
1049 }
1050
1051 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1052 {
1053         struct drm_i915_private *i915 = engine->i915;
1054         struct i915_wa_list *w = &engine->whitelist;
1055
1056         GEM_BUG_ON(engine->id != RCS);
1057
1058         wa_init_start(w, "whitelist");
1059
1060         if (INTEL_GEN(i915) < 8)
1061                 return;
1062         else if (IS_BROADWELL(i915))
1063                 return;
1064         else if (IS_CHERRYVIEW(i915))
1065                 return;
1066         else if (IS_SKYLAKE(i915))
1067                 skl_whitelist_build(w);
1068         else if (IS_BROXTON(i915))
1069                 bxt_whitelist_build(w);
1070         else if (IS_KABYLAKE(i915))
1071                 kbl_whitelist_build(w);
1072         else if (IS_GEMINILAKE(i915))
1073                 glk_whitelist_build(w);
1074         else if (IS_COFFEELAKE(i915))
1075                 cfl_whitelist_build(w);
1076         else if (IS_CANNONLAKE(i915))
1077                 cnl_whitelist_build(w);
1078         else if (IS_ICELAKE(i915))
1079                 icl_whitelist_build(w);
1080         else
1081                 MISSING_CASE(INTEL_GEN(i915));
1082
1083         wa_init_finish(w);
1084 }
1085
1086 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1087 {
1088         struct drm_i915_private *dev_priv = engine->i915;
1089         const struct i915_wa_list *wal = &engine->whitelist;
1090         const u32 base = engine->mmio_base;
1091         struct i915_wa *wa;
1092         unsigned int i;
1093
1094         if (!wal->count)
1095                 return;
1096
1097         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1098                 I915_WRITE(RING_FORCE_TO_NONPRIV(base, i),
1099                            i915_mmio_reg_offset(wa->reg));
1100
1101         /* And clear the rest just in case of garbage */
1102         for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1103                 I915_WRITE(RING_FORCE_TO_NONPRIV(base, i),
1104                            i915_mmio_reg_offset(RING_NOPID(base)));
1105 }
1106
1107 static void
1108 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1109 {
1110         struct drm_i915_private *i915 = engine->i915;
1111
1112         if (IS_ICELAKE(i915)) {
1113                 /* This is not an Wa. Enable for better image quality */
1114                 wa_masked_en(wal,
1115                              _3D_CHICKEN3,
1116                              _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1117
1118                 /* WaPipelineFlushCoherentLines:icl */
1119                 wa_write_or(wal,
1120                             GEN8_L3SQCREG4,
1121                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1122
1123                 /*
1124                  * Wa_1405543622:icl
1125                  * Formerly known as WaGAPZPriorityScheme
1126                  */
1127                 wa_write_or(wal,
1128                             GEN8_GARBCNTL,
1129                             GEN11_ARBITRATION_PRIO_ORDER_MASK);
1130
1131                 /*
1132                  * Wa_1604223664:icl
1133                  * Formerly known as WaL3BankAddressHashing
1134                  */
1135                 wa_write_masked_or(wal,
1136                                    GEN8_GARBCNTL,
1137                                    GEN11_HASH_CTRL_EXCL_MASK,
1138                                    GEN11_HASH_CTRL_EXCL_BIT0);
1139                 wa_write_masked_or(wal,
1140                                    GEN11_GLBLINVL,
1141                                    GEN11_BANK_HASH_ADDR_EXCL_MASK,
1142                                    GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1143
1144                 /*
1145                  * Wa_1405733216:icl
1146                  * Formerly known as WaDisableCleanEvicts
1147                  */
1148                 wa_write_or(wal,
1149                             GEN8_L3SQCREG4,
1150                             GEN11_LQSC_CLEAN_EVICT_DISABLE);
1151
1152                 /* WaForwardProgressSoftReset:icl */
1153                 wa_write_or(wal,
1154                             GEN10_SCRATCH_LNCF2,
1155                             PMFLUSHDONE_LNICRSDROP |
1156                             PMFLUSH_GAPL3UNBLOCK |
1157                             PMFLUSHDONE_LNEBLK);
1158
1159                 /* Wa_1406609255:icl (pre-prod) */
1160                 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1161                         wa_write_or(wal,
1162                                     GEN7_SARCHKMD,
1163                                     GEN7_DISABLE_DEMAND_PREFETCH |
1164                                     GEN7_DISABLE_SAMPLER_PREFETCH);
1165         }
1166
1167         if (IS_GEN(i915, 9) || IS_CANNONLAKE(i915)) {
1168                 /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,cnl */
1169                 wa_masked_en(wal,
1170                              GEN7_FF_SLICE_CS_CHICKEN1,
1171                              GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1172         }
1173
1174         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1175                 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1176                 wa_write_or(wal,
1177                             GEN8_GARBCNTL,
1178                             GEN9_GAPS_TSV_CREDIT_DISABLE);
1179         }
1180
1181         if (IS_BROXTON(i915)) {
1182                 /* WaDisablePooledEuLoadBalancingFix:bxt */
1183                 wa_masked_en(wal,
1184                              FF_SLICE_CS_CHICKEN2,
1185                              GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1186         }
1187
1188         if (IS_GEN(i915, 9)) {
1189                 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1190                 wa_masked_en(wal,
1191                              GEN9_CSFE_CHICKEN1_RCS,
1192                              GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1193
1194                 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1195                 wa_write_or(wal,
1196                             BDW_SCRATCH1,
1197                             GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1198
1199                 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1200                 if (IS_GEN9_LP(i915))
1201                         wa_write_masked_or(wal,
1202                                            GEN8_L3SQCREG1,
1203                                            L3_PRIO_CREDITS_MASK,
1204                                            L3_GENERAL_PRIO_CREDITS(62) |
1205                                            L3_HIGH_PRIO_CREDITS(2));
1206
1207                 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1208                 wa_write_or(wal,
1209                             GEN8_L3SQCREG4,
1210                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1211         }
1212 }
1213
1214 static void
1215 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1216 {
1217         struct drm_i915_private *i915 = engine->i915;
1218
1219         /* WaKBLVECSSemaphoreWaitPoll:kbl */
1220         if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1221                 wa_write(wal,
1222                          RING_SEMA_WAIT_POLL(engine->mmio_base),
1223                          1);
1224         }
1225 }
1226
1227 static void
1228 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1229 {
1230         if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1231                 return;
1232
1233         if (engine->id == RCS)
1234                 rcs_engine_wa_init(engine, wal);
1235         else
1236                 xcs_engine_wa_init(engine, wal);
1237 }
1238
1239 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1240 {
1241         struct i915_wa_list *wal = &engine->wa_list;
1242
1243         if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
1244                 return;
1245
1246         wa_init_start(wal, engine->name);
1247         engine_init_workarounds(engine, wal);
1248         wa_init_finish(wal);
1249 }
1250
1251 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1252 {
1253         wa_list_apply(engine->i915, &engine->wa_list);
1254 }
1255
1256 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1257 #include "selftests/intel_workarounds.c"
1258 #endif