15e90fd2cfdc2e9882cfeff9f2e2e1e4a1ff1a7e
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / gt / intel_workarounds.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2018 Intel Corporation
5  */
6
7 #include "i915_drv.h"
8 #include "intel_context.h"
9 #include "intel_workarounds.h"
10
11 /**
12  * DOC: Hardware workarounds
13  *
14  * This file is intended as a central place to implement most [1]_ of the
15  * required workarounds for hardware to work as originally intended. They fall
16  * in five basic categories depending on how/when they are applied:
17  *
18  * - Workarounds that touch registers that are saved/restored to/from the HW
19  *   context image. The list is emitted (via Load Register Immediate commands)
20  *   everytime a new context is created.
21  * - GT workarounds. The list of these WAs is applied whenever these registers
22  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
23  * - Display workarounds. The list is applied during display clock-gating
24  *   initialization.
25  * - Workarounds that whitelist a privileged register, so that UMDs can manage
26  *   them directly. This is just a special case of a MMMIO workaround (as we
27  *   write the list of these to/be-whitelisted registers to some special HW
28  *   registers).
29  * - Workaround batchbuffers, that get executed automatically by the hardware
30  *   on every HW context restore.
31  *
32  * .. [1] Please notice that there are other WAs that, due to their nature,
33  *    cannot be applied from a central place. Those are peppered around the rest
34  *    of the code, as needed.
35  *
36  * .. [2] Technically, some registers are powercontext saved & restored, so they
37  *    survive a suspend/resume. In practice, writing them again is not too
38  *    costly and simplifies things. We can revisit this in the future.
39  *
40  * Layout
41  * ~~~~~~
42  *
43  * Keep things in this file ordered by WA type, as per the above (context, GT,
44  * display, register whitelist, batchbuffer). Then, inside each type, keep the
45  * following order:
46  *
47  * - Infrastructure functions and macros
48  * - WAs per platform in standard gen/chrono order
49  * - Public functions to init or apply the given workaround type.
50  */
51
52 static void wa_init_start(struct i915_wa_list *wal, const char *name)
53 {
54         wal->name = name;
55 }
56
57 #define WA_LIST_CHUNK (1 << 4)
58
59 static void wa_init_finish(struct i915_wa_list *wal)
60 {
61         /* Trim unused entries. */
62         if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
63                 struct i915_wa *list = kmemdup(wal->list,
64                                                wal->count * sizeof(*list),
65                                                GFP_KERNEL);
66
67                 if (list) {
68                         kfree(wal->list);
69                         wal->list = list;
70                 }
71         }
72
73         if (!wal->count)
74                 return;
75
76         DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
77                          wal->wa_count, wal->name);
78 }
79
80 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
81 {
82         unsigned int addr = i915_mmio_reg_offset(wa->reg);
83         unsigned int start = 0, end = wal->count;
84         const unsigned int grow = WA_LIST_CHUNK;
85         struct i915_wa *wa_;
86
87         GEM_BUG_ON(!is_power_of_2(grow));
88
89         if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
90                 struct i915_wa *list;
91
92                 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
93                                      GFP_KERNEL);
94                 if (!list) {
95                         DRM_ERROR("No space for workaround init!\n");
96                         return;
97                 }
98
99                 if (wal->list)
100                         memcpy(list, wal->list, sizeof(*wa) * wal->count);
101
102                 wal->list = list;
103         }
104
105         while (start < end) {
106                 unsigned int mid = start + (end - start) / 2;
107
108                 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
109                         start = mid + 1;
110                 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
111                         end = mid;
112                 } else {
113                         wa_ = &wal->list[mid];
114
115                         if ((wa->mask & ~wa_->mask) == 0) {
116                                 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
117                                           i915_mmio_reg_offset(wa_->reg),
118                                           wa_->mask, wa_->val);
119
120                                 wa_->val &= ~wa->mask;
121                         }
122
123                         wal->wa_count++;
124                         wa_->val |= wa->val;
125                         wa_->mask |= wa->mask;
126                         wa_->read |= wa->read;
127                         return;
128                 }
129         }
130
131         wal->wa_count++;
132         wa_ = &wal->list[wal->count++];
133         *wa_ = *wa;
134
135         while (wa_-- > wal->list) {
136                 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
137                            i915_mmio_reg_offset(wa_[1].reg));
138                 if (i915_mmio_reg_offset(wa_[1].reg) >
139                     i915_mmio_reg_offset(wa_[0].reg))
140                         break;
141
142                 swap(wa_[1], wa_[0]);
143         }
144 }
145
146 static void
147 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
148                    u32 val)
149 {
150         struct i915_wa wa = {
151                 .reg  = reg,
152                 .mask = mask,
153                 .val  = val,
154                 .read = mask,
155         };
156
157         _wa_add(wal, &wa);
158 }
159
160 static void
161 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
162 {
163         wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
164 }
165
166 static void
167 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
168 {
169         wa_write_masked_or(wal, reg, ~0, val);
170 }
171
172 static void
173 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
174 {
175         wa_write_masked_or(wal, reg, val, val);
176 }
177
178 static void
179 ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
180 {
181         struct i915_wa wa = {
182                 .reg  = reg,
183                 .mask = mask,
184                 .val  = val,
185                 /* Bonkers HW, skip verifying */
186         };
187
188         _wa_add(wal, &wa);
189 }
190
191 #define WA_SET_BIT_MASKED(addr, mask) \
192         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
193
194 #define WA_CLR_BIT_MASKED(addr, mask) \
195         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
196
197 #define WA_SET_FIELD_MASKED(addr, mask, value) \
198         wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
199
200 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
201                                       struct i915_wa_list *wal)
202 {
203         WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
204
205         /* WaDisableAsyncFlipPerfMode:bdw,chv */
206         WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
207
208         /* WaDisablePartialInstShootdown:bdw,chv */
209         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
210                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
211
212         /* Use Force Non-Coherent whenever executing a 3D context. This is a
213          * workaround for for a possible hang in the unlikely event a TLB
214          * invalidation occurs during a PSD flush.
215          */
216         /* WaForceEnableNonCoherent:bdw,chv */
217         /* WaHdcDisableFetchWhenMasked:bdw,chv */
218         WA_SET_BIT_MASKED(HDC_CHICKEN0,
219                           HDC_DONOT_FETCH_MEM_WHEN_MASKED |
220                           HDC_FORCE_NON_COHERENT);
221
222         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
223          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
224          *  polygons in the same 8x4 pixel/sample area to be processed without
225          *  stalling waiting for the earlier ones to write to Hierarchical Z
226          *  buffer."
227          *
228          * This optimization is off by default for BDW and CHV; turn it on.
229          */
230         WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
231
232         /* Wa4x4STCOptimizationDisable:bdw,chv */
233         WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
234
235         /*
236          * BSpec recommends 8x4 when MSAA is used,
237          * however in practice 16x4 seems fastest.
238          *
239          * Note that PS/WM thread counts depend on the WIZ hashing
240          * disable bit, which we don't touch here, but it's good
241          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
242          */
243         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
244                             GEN6_WIZ_HASHING_MASK,
245                             GEN6_WIZ_HASHING_16x4);
246 }
247
248 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
249                                      struct i915_wa_list *wal)
250 {
251         struct drm_i915_private *i915 = engine->i915;
252
253         gen8_ctx_workarounds_init(engine, wal);
254
255         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
256         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
257
258         /* WaDisableDopClockGating:bdw
259          *
260          * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
261          * to disable EUTC clock gating.
262          */
263         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
264                           DOP_CLOCK_GATING_DISABLE);
265
266         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
267                           GEN8_SAMPLER_POWER_BYPASS_DIS);
268
269         WA_SET_BIT_MASKED(HDC_CHICKEN0,
270                           /* WaForceContextSaveRestoreNonCoherent:bdw */
271                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
272                           /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
273                           (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
274 }
275
276 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
277                                      struct i915_wa_list *wal)
278 {
279         gen8_ctx_workarounds_init(engine, wal);
280
281         /* WaDisableThreadStallDopClockGating:chv */
282         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
283
284         /* Improve HiZ throughput on CHV. */
285         WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
286 }
287
288 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
289                                       struct i915_wa_list *wal)
290 {
291         struct drm_i915_private *i915 = engine->i915;
292
293         if (HAS_LLC(i915)) {
294                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
295                  *
296                  * Must match Display Engine. See
297                  * WaCompressedResourceDisplayNewHashMode.
298                  */
299                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
300                                   GEN9_PBE_COMPRESSED_HASH_SELECTION);
301                 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
302                                   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
303         }
304
305         /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
306         /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
307         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
308                           FLOW_CONTROL_ENABLE |
309                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
310
311         /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
312         if (!IS_COFFEELAKE(i915))
313                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
314                                   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
315
316         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
317         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
318         WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
319                           GEN9_ENABLE_YV12_BUGFIX |
320                           GEN9_ENABLE_GPGPU_PREEMPTION);
321
322         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
323         /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
324         WA_SET_BIT_MASKED(CACHE_MODE_1,
325                           GEN8_4x4_STC_OPTIMIZATION_DISABLE |
326                           GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
327
328         /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
329         WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
330                           GEN9_CCS_TLB_PREFETCH_ENABLE);
331
332         /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
333         WA_SET_BIT_MASKED(HDC_CHICKEN0,
334                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
335                           HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
336
337         /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
338          * both tied to WaForceContextSaveRestoreNonCoherent
339          * in some hsds for skl. We keep the tie for all gen9. The
340          * documentation is a bit hazy and so we want to get common behaviour,
341          * even though there is no clear evidence we would need both on kbl/bxt.
342          * This area has been source of system hangs so we play it safe
343          * and mimic the skl regardless of what bspec says.
344          *
345          * Use Force Non-Coherent whenever executing a 3D context. This
346          * is a workaround for a possible hang in the unlikely event
347          * a TLB invalidation occurs during a PSD flush.
348          */
349
350         /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
351         WA_SET_BIT_MASKED(HDC_CHICKEN0,
352                           HDC_FORCE_NON_COHERENT);
353
354         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
355         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
356                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
357                                   GEN8_SAMPLER_POWER_BYPASS_DIS);
358
359         /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
360         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
361
362         /*
363          * Supporting preemption with fine-granularity requires changes in the
364          * batch buffer programming. Since we can't break old userspace, we
365          * need to set our default preemption level to safe value. Userspace is
366          * still able to use more fine-grained preemption levels, since in
367          * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
368          * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
369          * not real HW workarounds, but merely a way to start using preemption
370          * while maintaining old contract with userspace.
371          */
372
373         /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
374         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
375
376         /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
377         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
378                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
379                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
380
381         /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
382         if (IS_GEN9_LP(i915))
383                 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
384 }
385
386 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
387                                 struct i915_wa_list *wal)
388 {
389         struct drm_i915_private *i915 = engine->i915;
390         u8 vals[3] = { 0, 0, 0 };
391         unsigned int i;
392
393         for (i = 0; i < 3; i++) {
394                 u8 ss;
395
396                 /*
397                  * Only consider slices where one, and only one, subslice has 7
398                  * EUs
399                  */
400                 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
401                         continue;
402
403                 /*
404                  * subslice_7eu[i] != 0 (because of the check above) and
405                  * ss_max == 4 (maximum number of subslices possible per slice)
406                  *
407                  * ->    0 <= ss <= 3;
408                  */
409                 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
410                 vals[i] = 3 - ss;
411         }
412
413         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
414                 return;
415
416         /* Tune IZ hashing. See intel_device_info_runtime_init() */
417         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
418                             GEN9_IZ_HASHING_MASK(2) |
419                             GEN9_IZ_HASHING_MASK(1) |
420                             GEN9_IZ_HASHING_MASK(0),
421                             GEN9_IZ_HASHING(2, vals[2]) |
422                             GEN9_IZ_HASHING(1, vals[1]) |
423                             GEN9_IZ_HASHING(0, vals[0]));
424 }
425
426 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
427                                      struct i915_wa_list *wal)
428 {
429         gen9_ctx_workarounds_init(engine, wal);
430         skl_tune_iz_hashing(engine, wal);
431 }
432
433 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
434                                      struct i915_wa_list *wal)
435 {
436         gen9_ctx_workarounds_init(engine, wal);
437
438         /* WaDisableThreadStallDopClockGating:bxt */
439         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
440                           STALL_DOP_GATING_DISABLE);
441
442         /* WaToEnableHwFixForPushConstHWBug:bxt */
443         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
444                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
445 }
446
447 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
448                                      struct i915_wa_list *wal)
449 {
450         struct drm_i915_private *i915 = engine->i915;
451
452         gen9_ctx_workarounds_init(engine, wal);
453
454         /* WaToEnableHwFixForPushConstHWBug:kbl */
455         if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
456                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
457                                   GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
458
459         /* WaDisableSbeCacheDispatchPortSharing:kbl */
460         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
461                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
462 }
463
464 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
465                                      struct i915_wa_list *wal)
466 {
467         gen9_ctx_workarounds_init(engine, wal);
468
469         /* WaToEnableHwFixForPushConstHWBug:glk */
470         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
471                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
472 }
473
474 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
475                                      struct i915_wa_list *wal)
476 {
477         gen9_ctx_workarounds_init(engine, wal);
478
479         /* WaToEnableHwFixForPushConstHWBug:cfl */
480         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
481                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
482
483         /* WaDisableSbeCacheDispatchPortSharing:cfl */
484         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
485                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
486 }
487
488 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
489                                      struct i915_wa_list *wal)
490 {
491         struct drm_i915_private *i915 = engine->i915;
492
493         /* WaForceContextSaveRestoreNonCoherent:cnl */
494         WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
495                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
496
497         /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
498         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
499                 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
500
501         /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
502         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
503                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
504
505         /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
506         if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
507                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
508                                   GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
509
510         /* WaPushConstantDereferenceHoldDisable:cnl */
511         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
512
513         /* FtrEnableFastAnisoL1BankingFix:cnl */
514         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
515
516         /* WaDisable3DMidCmdPreemption:cnl */
517         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
518
519         /* WaDisableGPGPUMidCmdPreemption:cnl */
520         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
521                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
522                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
523
524         /* WaDisableEarlyEOT:cnl */
525         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
526 }
527
528 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
529                                      struct i915_wa_list *wal)
530 {
531         struct drm_i915_private *i915 = engine->i915;
532
533         /* WaDisableBankHangMode:icl */
534         wa_write(wal,
535                  GEN8_L3CNTLREG,
536                  intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
537                  GEN8_ERRDETBCTRL);
538
539         /* WaDisableBankHangMode:icl */
540         wa_write(wal,
541                  GEN8_L3CNTLREG,
542                  intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
543                  GEN8_ERRDETBCTRL);
544
545         /* Wa_1604370585:icl (pre-prod)
546          * Formerly known as WaPushConstantDereferenceHoldDisable
547          */
548         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
549                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
550                                   PUSH_CONSTANT_DEREF_DISABLE);
551
552         /* WaForceEnableNonCoherent:icl
553          * This is not the same workaround as in early Gen9 platforms, where
554          * lacking this could cause system hangs, but coherency performance
555          * overhead is high and only a few compute workloads really need it
556          * (the register is whitelisted in hardware now, so UMDs can opt in
557          * for coherency if they have a good reason).
558          */
559         WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
560
561         /* Wa_2006611047:icl (pre-prod)
562          * Formerly known as WaDisableImprovedTdlClkGating
563          */
564         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
565                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
566                                   GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
567
568         /* Wa_2006665173:icl (pre-prod) */
569         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
570                 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
571                                   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
572
573         /* WaEnableFloatBlendOptimization:icl */
574         wa_write_masked_or(wal,
575                            GEN10_CACHE_MODE_SS,
576                            0, /* write-only, so skip validation */
577                            _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
578
579         /* WaDisableGPGPUMidThreadPreemption:icl */
580         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
581                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
582                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
583
584         /* allow headerless messages for preemptible GPGPU context */
585         WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
586                           GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
587 }
588
589 static void
590 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
591                            struct i915_wa_list *wal,
592                            const char *name)
593 {
594         struct drm_i915_private *i915 = engine->i915;
595
596         if (engine->class != RENDER_CLASS)
597                 return;
598
599         wa_init_start(wal, name);
600
601         if (IS_GEN(i915, 11))
602                 icl_ctx_workarounds_init(engine, wal);
603         else if (IS_CANNONLAKE(i915))
604                 cnl_ctx_workarounds_init(engine, wal);
605         else if (IS_COFFEELAKE(i915))
606                 cfl_ctx_workarounds_init(engine, wal);
607         else if (IS_GEMINILAKE(i915))
608                 glk_ctx_workarounds_init(engine, wal);
609         else if (IS_KABYLAKE(i915))
610                 kbl_ctx_workarounds_init(engine, wal);
611         else if (IS_BROXTON(i915))
612                 bxt_ctx_workarounds_init(engine, wal);
613         else if (IS_SKYLAKE(i915))
614                 skl_ctx_workarounds_init(engine, wal);
615         else if (IS_CHERRYVIEW(i915))
616                 chv_ctx_workarounds_init(engine, wal);
617         else if (IS_BROADWELL(i915))
618                 bdw_ctx_workarounds_init(engine, wal);
619         else if (INTEL_GEN(i915) < 8)
620                 return;
621         else
622                 MISSING_CASE(INTEL_GEN(i915));
623
624         wa_init_finish(wal);
625 }
626
627 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
628 {
629         __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
630 }
631
632 int intel_engine_emit_ctx_wa(struct i915_request *rq)
633 {
634         struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
635         struct i915_wa *wa;
636         unsigned int i;
637         u32 *cs;
638         int ret;
639
640         if (wal->count == 0)
641                 return 0;
642
643         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
644         if (ret)
645                 return ret;
646
647         cs = intel_ring_begin(rq, (wal->count * 2 + 2));
648         if (IS_ERR(cs))
649                 return PTR_ERR(cs);
650
651         *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
652         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
653                 *cs++ = i915_mmio_reg_offset(wa->reg);
654                 *cs++ = wa->val;
655         }
656         *cs++ = MI_NOOP;
657
658         intel_ring_advance(rq, cs);
659
660         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
661         if (ret)
662                 return ret;
663
664         return 0;
665 }
666
667 static void
668 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
669 {
670         /* WaDisableKillLogic:bxt,skl,kbl */
671         if (!IS_COFFEELAKE(i915))
672                 wa_write_or(wal,
673                             GAM_ECOCHK,
674                             ECOCHK_DIS_TLB);
675
676         if (HAS_LLC(i915)) {
677                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
678                  *
679                  * Must match Display Engine. See
680                  * WaCompressedResourceDisplayNewHashMode.
681                  */
682                 wa_write_or(wal,
683                             MMCD_MISC_CTRL,
684                             MMCD_PCLA | MMCD_HOTSPOT_EN);
685         }
686
687         /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
688         wa_write_or(wal,
689                     GAM_ECOCHK,
690                     BDW_DISABLE_HDC_INVALIDATION);
691 }
692
693 static void
694 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
695 {
696         gen9_gt_workarounds_init(i915, wal);
697
698         /* WaDisableGafsUnitClkGating:skl */
699         wa_write_or(wal,
700                     GEN7_UCGCTL4,
701                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
702
703         /* WaInPlaceDecompressionHang:skl */
704         if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
705                 wa_write_or(wal,
706                             GEN9_GAMT_ECO_REG_RW_IA,
707                             GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
708 }
709
710 static void
711 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
712 {
713         gen9_gt_workarounds_init(i915, wal);
714
715         /* WaInPlaceDecompressionHang:bxt */
716         wa_write_or(wal,
717                     GEN9_GAMT_ECO_REG_RW_IA,
718                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
719 }
720
721 static void
722 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
723 {
724         gen9_gt_workarounds_init(i915, wal);
725
726         /* WaDisableDynamicCreditSharing:kbl */
727         if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
728                 wa_write_or(wal,
729                             GAMT_CHKN_BIT_REG,
730                             GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
731
732         /* WaDisableGafsUnitClkGating:kbl */
733         wa_write_or(wal,
734                     GEN7_UCGCTL4,
735                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
736
737         /* WaInPlaceDecompressionHang:kbl */
738         wa_write_or(wal,
739                     GEN9_GAMT_ECO_REG_RW_IA,
740                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
741 }
742
743 static void
744 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
745 {
746         gen9_gt_workarounds_init(i915, wal);
747 }
748
749 static void
750 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
751 {
752         gen9_gt_workarounds_init(i915, wal);
753
754         /* WaDisableGafsUnitClkGating:cfl */
755         wa_write_or(wal,
756                     GEN7_UCGCTL4,
757                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
758
759         /* WaInPlaceDecompressionHang:cfl */
760         wa_write_or(wal,
761                     GEN9_GAMT_ECO_REG_RW_IA,
762                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
763 }
764
765 static void
766 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
767 {
768         const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
769         u32 mcr_slice_subslice_mask;
770
771         /*
772          * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
773          * L3Banks could be fused off in single slice scenario. If that is
774          * the case, we might need to program MCR select to a valid L3Bank
775          * by default, to make sure we correctly read certain registers
776          * later on (in the range 0xB100 - 0xB3FF).
777          * This might be incompatible with
778          * WaProgramMgsrForCorrectSliceSpecificMmioReads.
779          * Fortunately, this should not happen in production hardware, so
780          * we only assert that this is the case (instead of implementing
781          * something more complex that requires checking the range of every
782          * MMIO read).
783          */
784         if (INTEL_GEN(i915) >= 10 &&
785             is_power_of_2(sseu->slice_mask)) {
786                 /*
787                  * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
788                  * enabled subslice, no need to redirect MCR packet
789                  */
790                 u32 slice = fls(sseu->slice_mask);
791                 u32 fuse3 =
792                         intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
793                 u8 ss_mask = sseu->subslice_mask[slice];
794
795                 u8 enabled_mask = (ss_mask | ss_mask >>
796                                    GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
797                 u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
798
799                 /*
800                  * Production silicon should have matched L3Bank and
801                  * subslice enabled
802                  */
803                 WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
804         }
805
806         if (INTEL_GEN(i915) >= 11)
807                 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
808                                           GEN11_MCR_SUBSLICE_MASK;
809         else
810                 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
811                                           GEN8_MCR_SUBSLICE_MASK;
812         /*
813          * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
814          * Before any MMIO read into slice/subslice specific registers, MCR
815          * packet control register needs to be programmed to point to any
816          * enabled s/ss pair. Otherwise, incorrect values will be returned.
817          * This means each subsequent MMIO read will be forwarded to an
818          * specific s/ss combination, but this is OK since these registers
819          * are consistent across s/ss in almost all cases. In the rare
820          * occasions, such as INSTDONE, where this value is dependent
821          * on s/ss combo, the read should be done with read_subslice_reg.
822          */
823         wa_write_masked_or(wal,
824                            GEN8_MCR_SELECTOR,
825                            mcr_slice_subslice_mask,
826                            intel_calculate_mcr_s_ss_select(i915));
827 }
828
829 static void
830 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
831 {
832         wa_init_mcr(i915, wal);
833
834         /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
835         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
836                 wa_write_or(wal,
837                             GAMT_CHKN_BIT_REG,
838                             GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
839
840         /* WaInPlaceDecompressionHang:cnl */
841         wa_write_or(wal,
842                     GEN9_GAMT_ECO_REG_RW_IA,
843                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
844 }
845
846 static void
847 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
848 {
849         wa_init_mcr(i915, wal);
850
851         /* WaInPlaceDecompressionHang:icl */
852         wa_write_or(wal,
853                     GEN9_GAMT_ECO_REG_RW_IA,
854                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
855
856         /* WaModifyGamTlbPartitioning:icl */
857         wa_write_masked_or(wal,
858                            GEN11_GACB_PERF_CTRL,
859                            GEN11_HASH_CTRL_MASK,
860                            GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
861
862         /* Wa_1405766107:icl
863          * Formerly known as WaCL2SFHalfMaxAlloc
864          */
865         wa_write_or(wal,
866                     GEN11_LSN_UNSLCVC,
867                     GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
868                     GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
869
870         /* Wa_220166154:icl
871          * Formerly known as WaDisCtxReload
872          */
873         wa_write_or(wal,
874                     GEN8_GAMW_ECO_DEV_RW_IA,
875                     GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
876
877         /* Wa_1405779004:icl (pre-prod) */
878         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
879                 wa_write_or(wal,
880                             SLICE_UNIT_LEVEL_CLKGATE,
881                             MSCUNIT_CLKGATE_DIS);
882
883         /* Wa_1406680159:icl */
884         wa_write_or(wal,
885                     SUBSLICE_UNIT_LEVEL_CLKGATE,
886                     GWUNIT_CLKGATE_DIS);
887
888         /* Wa_1406838659:icl (pre-prod) */
889         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
890                 wa_write_or(wal,
891                             INF_UNIT_LEVEL_CLKGATE,
892                             CGPSF_CLKGATE_DIS);
893
894         /* Wa_1406463099:icl
895          * Formerly known as WaGamTlbPendError
896          */
897         wa_write_or(wal,
898                     GAMT_CHKN_BIT_REG,
899                     GAMT_CHKN_DISABLE_L3_COH_PIPE);
900 }
901
902 static void
903 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
904 {
905         if (IS_GEN(i915, 11))
906                 icl_gt_workarounds_init(i915, wal);
907         else if (IS_CANNONLAKE(i915))
908                 cnl_gt_workarounds_init(i915, wal);
909         else if (IS_COFFEELAKE(i915))
910                 cfl_gt_workarounds_init(i915, wal);
911         else if (IS_GEMINILAKE(i915))
912                 glk_gt_workarounds_init(i915, wal);
913         else if (IS_KABYLAKE(i915))
914                 kbl_gt_workarounds_init(i915, wal);
915         else if (IS_BROXTON(i915))
916                 bxt_gt_workarounds_init(i915, wal);
917         else if (IS_SKYLAKE(i915))
918                 skl_gt_workarounds_init(i915, wal);
919         else if (INTEL_GEN(i915) <= 8)
920                 return;
921         else
922                 MISSING_CASE(INTEL_GEN(i915));
923 }
924
925 void intel_gt_init_workarounds(struct drm_i915_private *i915)
926 {
927         struct i915_wa_list *wal = &i915->gt_wa_list;
928
929         wa_init_start(wal, "GT");
930         gt_init_workarounds(i915, wal);
931         wa_init_finish(wal);
932 }
933
934 static enum forcewake_domains
935 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
936 {
937         enum forcewake_domains fw = 0;
938         struct i915_wa *wa;
939         unsigned int i;
940
941         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
942                 fw |= intel_uncore_forcewake_for_reg(uncore,
943                                                      wa->reg,
944                                                      FW_REG_READ |
945                                                      FW_REG_WRITE);
946
947         return fw;
948 }
949
950 static bool
951 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
952 {
953         if ((cur ^ wa->val) & wa->read) {
954                 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
955                           name, from, i915_mmio_reg_offset(wa->reg),
956                           cur, cur & wa->read,
957                           wa->val, wa->mask);
958
959                 return false;
960         }
961
962         return true;
963 }
964
965 static void
966 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
967 {
968         enum forcewake_domains fw;
969         unsigned long flags;
970         struct i915_wa *wa;
971         unsigned int i;
972
973         if (!wal->count)
974                 return;
975
976         fw = wal_get_fw_for_rmw(uncore, wal);
977
978         spin_lock_irqsave(&uncore->lock, flags);
979         intel_uncore_forcewake_get__locked(uncore, fw);
980
981         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
982                 intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
983                 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
984                         wa_verify(wa,
985                                   intel_uncore_read_fw(uncore, wa->reg),
986                                   wal->name, "application");
987         }
988
989         intel_uncore_forcewake_put__locked(uncore, fw);
990         spin_unlock_irqrestore(&uncore->lock, flags);
991 }
992
993 void intel_gt_apply_workarounds(struct drm_i915_private *i915)
994 {
995         wa_list_apply(&i915->uncore, &i915->gt_wa_list);
996 }
997
998 static bool wa_list_verify(struct intel_uncore *uncore,
999                            const struct i915_wa_list *wal,
1000                            const char *from)
1001 {
1002         struct i915_wa *wa;
1003         unsigned int i;
1004         bool ok = true;
1005
1006         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1007                 ok &= wa_verify(wa,
1008                                 intel_uncore_read(uncore, wa->reg),
1009                                 wal->name, from);
1010
1011         return ok;
1012 }
1013
1014 bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
1015                                  const char *from)
1016 {
1017         return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
1018 }
1019
1020 static void
1021 whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1022 {
1023         struct i915_wa wa = {
1024                 .reg = reg
1025         };
1026
1027         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1028                 return;
1029
1030         wa.reg.reg |= flags;
1031         _wa_add(wal, &wa);
1032 }
1033
1034 static void
1035 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1036 {
1037         whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW);
1038 }
1039
1040 static void gen9_whitelist_build(struct i915_wa_list *w)
1041 {
1042         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1043         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1044
1045         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1046         whitelist_reg(w, GEN8_CS_CHICKEN1);
1047
1048         /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1049         whitelist_reg(w, GEN8_HDC_CHICKEN1);
1050 }
1051
1052 static void skl_whitelist_build(struct intel_engine_cs *engine)
1053 {
1054         struct i915_wa_list *w = &engine->whitelist;
1055
1056         if (engine->class != RENDER_CLASS)
1057                 return;
1058
1059         gen9_whitelist_build(w);
1060
1061         /* WaDisableLSQCROPERFforOCL:skl */
1062         whitelist_reg(w, GEN8_L3SQCREG4);
1063 }
1064
1065 static void bxt_whitelist_build(struct intel_engine_cs *engine)
1066 {
1067         if (engine->class != RENDER_CLASS)
1068                 return;
1069
1070         gen9_whitelist_build(&engine->whitelist);
1071 }
1072
1073 static void kbl_whitelist_build(struct intel_engine_cs *engine)
1074 {
1075         struct i915_wa_list *w = &engine->whitelist;
1076
1077         if (engine->class != RENDER_CLASS)
1078                 return;
1079
1080         gen9_whitelist_build(w);
1081
1082         /* WaDisableLSQCROPERFforOCL:kbl */
1083         whitelist_reg(w, GEN8_L3SQCREG4);
1084 }
1085
1086 static void glk_whitelist_build(struct intel_engine_cs *engine)
1087 {
1088         struct i915_wa_list *w = &engine->whitelist;
1089
1090         if (engine->class != RENDER_CLASS)
1091                 return;
1092
1093         gen9_whitelist_build(w);
1094
1095         /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1096         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1097 }
1098
1099 static void cfl_whitelist_build(struct intel_engine_cs *engine)
1100 {
1101         if (engine->class != RENDER_CLASS)
1102                 return;
1103
1104         gen9_whitelist_build(&engine->whitelist);
1105 }
1106
1107 static void cnl_whitelist_build(struct intel_engine_cs *engine)
1108 {
1109         struct i915_wa_list *w = &engine->whitelist;
1110
1111         if (engine->class != RENDER_CLASS)
1112                 return;
1113
1114         /* WaEnablePreemptionGranularityControlByUMD:cnl */
1115         whitelist_reg(w, GEN8_CS_CHICKEN1);
1116 }
1117
1118 static void icl_whitelist_build(struct intel_engine_cs *engine)
1119 {
1120         struct i915_wa_list *w = &engine->whitelist;
1121
1122         switch (engine->class) {
1123         case RENDER_CLASS:
1124                 /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1125                 whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1126
1127                 /* WaAllowUMDToModifySamplerMode:icl */
1128                 whitelist_reg(w, GEN10_SAMPLER_MODE);
1129
1130                 /* WaEnableStateCacheRedirectToCS:icl */
1131                 whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1132                 break;
1133
1134         case VIDEO_DECODE_CLASS:
1135                 /* hucStatusRegOffset */
1136                 whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
1137                                   RING_FORCE_TO_NONPRIV_RD);
1138                 /* hucUKernelHdrInfoRegOffset */
1139                 whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
1140                                   RING_FORCE_TO_NONPRIV_RD);
1141                 /* hucStatus2RegOffset */
1142                 whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
1143                                   RING_FORCE_TO_NONPRIV_RD);
1144                 break;
1145
1146         default:
1147                 break;
1148         }
1149 }
1150
1151 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1152 {
1153         struct drm_i915_private *i915 = engine->i915;
1154         struct i915_wa_list *w = &engine->whitelist;
1155
1156         wa_init_start(w, "whitelist");
1157
1158         if (IS_GEN(i915, 11))
1159                 icl_whitelist_build(engine);
1160         else if (IS_CANNONLAKE(i915))
1161                 cnl_whitelist_build(engine);
1162         else if (IS_COFFEELAKE(i915))
1163                 cfl_whitelist_build(engine);
1164         else if (IS_GEMINILAKE(i915))
1165                 glk_whitelist_build(engine);
1166         else if (IS_KABYLAKE(i915))
1167                 kbl_whitelist_build(engine);
1168         else if (IS_BROXTON(i915))
1169                 bxt_whitelist_build(engine);
1170         else if (IS_SKYLAKE(i915))
1171                 skl_whitelist_build(engine);
1172         else if (INTEL_GEN(i915) <= 8)
1173                 return;
1174         else
1175                 MISSING_CASE(INTEL_GEN(i915));
1176
1177         wa_init_finish(w);
1178 }
1179
1180 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1181 {
1182         const struct i915_wa_list *wal = &engine->whitelist;
1183         struct intel_uncore *uncore = engine->uncore;
1184         const u32 base = engine->mmio_base;
1185         struct i915_wa *wa;
1186         unsigned int i;
1187
1188         if (!wal->count)
1189                 return;
1190
1191         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1192                 intel_uncore_write(uncore,
1193                                    RING_FORCE_TO_NONPRIV(base, i),
1194                                    i915_mmio_reg_offset(wa->reg));
1195
1196         /* And clear the rest just in case of garbage */
1197         for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1198                 intel_uncore_write(uncore,
1199                                    RING_FORCE_TO_NONPRIV(base, i),
1200                                    i915_mmio_reg_offset(RING_NOPID(base)));
1201 }
1202
1203 static void
1204 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1205 {
1206         struct drm_i915_private *i915 = engine->i915;
1207
1208         if (IS_GEN(i915, 11)) {
1209                 /* This is not an Wa. Enable for better image quality */
1210                 wa_masked_en(wal,
1211                              _3D_CHICKEN3,
1212                              _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1213
1214                 /* WaPipelineFlushCoherentLines:icl */
1215                 ignore_wa_write_or(wal,
1216                                    GEN8_L3SQCREG4,
1217                                    GEN8_LQSC_FLUSH_COHERENT_LINES,
1218                                    GEN8_LQSC_FLUSH_COHERENT_LINES);
1219
1220                 /*
1221                  * Wa_1405543622:icl
1222                  * Formerly known as WaGAPZPriorityScheme
1223                  */
1224                 wa_write_or(wal,
1225                             GEN8_GARBCNTL,
1226                             GEN11_ARBITRATION_PRIO_ORDER_MASK);
1227
1228                 /*
1229                  * Wa_1604223664:icl
1230                  * Formerly known as WaL3BankAddressHashing
1231                  */
1232                 wa_write_masked_or(wal,
1233                                    GEN8_GARBCNTL,
1234                                    GEN11_HASH_CTRL_EXCL_MASK,
1235                                    GEN11_HASH_CTRL_EXCL_BIT0);
1236                 wa_write_masked_or(wal,
1237                                    GEN11_GLBLINVL,
1238                                    GEN11_BANK_HASH_ADDR_EXCL_MASK,
1239                                    GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1240
1241                 /*
1242                  * Wa_1405733216:icl
1243                  * Formerly known as WaDisableCleanEvicts
1244                  */
1245                 ignore_wa_write_or(wal,
1246                                    GEN8_L3SQCREG4,
1247                                    GEN11_LQSC_CLEAN_EVICT_DISABLE,
1248                                    GEN11_LQSC_CLEAN_EVICT_DISABLE);
1249
1250                 /* WaForwardProgressSoftReset:icl */
1251                 wa_write_or(wal,
1252                             GEN10_SCRATCH_LNCF2,
1253                             PMFLUSHDONE_LNICRSDROP |
1254                             PMFLUSH_GAPL3UNBLOCK |
1255                             PMFLUSHDONE_LNEBLK);
1256
1257                 /* Wa_1406609255:icl (pre-prod) */
1258                 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1259                         wa_write_or(wal,
1260                                     GEN7_SARCHKMD,
1261                                     GEN7_DISABLE_DEMAND_PREFETCH |
1262                                     GEN7_DISABLE_SAMPLER_PREFETCH);
1263         }
1264
1265         if (IS_GEN_RANGE(i915, 9, 11)) {
1266                 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1267                 wa_masked_en(wal,
1268                              GEN7_FF_SLICE_CS_CHICKEN1,
1269                              GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1270         }
1271
1272         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1273                 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1274                 wa_write_or(wal,
1275                             GEN8_GARBCNTL,
1276                             GEN9_GAPS_TSV_CREDIT_DISABLE);
1277         }
1278
1279         if (IS_BROXTON(i915)) {
1280                 /* WaDisablePooledEuLoadBalancingFix:bxt */
1281                 wa_masked_en(wal,
1282                              FF_SLICE_CS_CHICKEN2,
1283                              GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1284         }
1285
1286         if (IS_GEN(i915, 9)) {
1287                 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1288                 wa_masked_en(wal,
1289                              GEN9_CSFE_CHICKEN1_RCS,
1290                              GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1291
1292                 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1293                 wa_write_or(wal,
1294                             BDW_SCRATCH1,
1295                             GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1296
1297                 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1298                 if (IS_GEN9_LP(i915))
1299                         wa_write_masked_or(wal,
1300                                            GEN8_L3SQCREG1,
1301                                            L3_PRIO_CREDITS_MASK,
1302                                            L3_GENERAL_PRIO_CREDITS(62) |
1303                                            L3_HIGH_PRIO_CREDITS(2));
1304
1305                 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1306                 wa_write_or(wal,
1307                             GEN8_L3SQCREG4,
1308                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1309         }
1310 }
1311
1312 static void
1313 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1314 {
1315         struct drm_i915_private *i915 = engine->i915;
1316
1317         /* WaKBLVECSSemaphoreWaitPoll:kbl */
1318         if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1319                 wa_write(wal,
1320                          RING_SEMA_WAIT_POLL(engine->mmio_base),
1321                          1);
1322         }
1323 }
1324
1325 static void
1326 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1327 {
1328         if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1329                 return;
1330
1331         if (engine->id == RCS0)
1332                 rcs_engine_wa_init(engine, wal);
1333         else
1334                 xcs_engine_wa_init(engine, wal);
1335 }
1336
1337 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1338 {
1339         struct i915_wa_list *wal = &engine->wa_list;
1340
1341         if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
1342                 return;
1343
1344         wa_init_start(wal, engine->name);
1345         engine_init_workarounds(engine, wal);
1346         wa_init_finish(wal);
1347 }
1348
1349 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1350 {
1351         wa_list_apply(engine->uncore, &engine->wa_list);
1352 }
1353
1354 static struct i915_vma *
1355 create_scratch(struct i915_address_space *vm, int count)
1356 {
1357         struct drm_i915_gem_object *obj;
1358         struct i915_vma *vma;
1359         unsigned int size;
1360         int err;
1361
1362         size = round_up(count * sizeof(u32), PAGE_SIZE);
1363         obj = i915_gem_object_create_internal(vm->i915, size);
1364         if (IS_ERR(obj))
1365                 return ERR_CAST(obj);
1366
1367         i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
1368
1369         vma = i915_vma_instance(obj, vm, NULL);
1370         if (IS_ERR(vma)) {
1371                 err = PTR_ERR(vma);
1372                 goto err_obj;
1373         }
1374
1375         err = i915_vma_pin(vma, 0, 0,
1376                            i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
1377         if (err)
1378                 goto err_obj;
1379
1380         return vma;
1381
1382 err_obj:
1383         i915_gem_object_put(obj);
1384         return ERR_PTR(err);
1385 }
1386
1387 static int
1388 wa_list_srm(struct i915_request *rq,
1389             const struct i915_wa_list *wal,
1390             struct i915_vma *vma)
1391 {
1392         const struct i915_wa *wa;
1393         unsigned int i;
1394         u32 srm, *cs;
1395
1396         srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1397         if (INTEL_GEN(rq->i915) >= 8)
1398                 srm++;
1399
1400         cs = intel_ring_begin(rq, 4 * wal->count);
1401         if (IS_ERR(cs))
1402                 return PTR_ERR(cs);
1403
1404         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1405                 *cs++ = srm;
1406                 *cs++ = i915_mmio_reg_offset(wa->reg);
1407                 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
1408                 *cs++ = 0;
1409         }
1410         intel_ring_advance(rq, cs);
1411
1412         return 0;
1413 }
1414
1415 static int engine_wa_list_verify(struct intel_context *ce,
1416                                  const struct i915_wa_list * const wal,
1417                                  const char *from)
1418 {
1419         const struct i915_wa *wa;
1420         struct i915_request *rq;
1421         struct i915_vma *vma;
1422         unsigned int i;
1423         u32 *results;
1424         int err;
1425
1426         if (!wal->count)
1427                 return 0;
1428
1429         vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count);
1430         if (IS_ERR(vma))
1431                 return PTR_ERR(vma);
1432
1433         rq = intel_context_create_request(ce);
1434         if (IS_ERR(rq)) {
1435                 err = PTR_ERR(rq);
1436                 goto err_vma;
1437         }
1438
1439         err = wa_list_srm(rq, wal, vma);
1440         if (err)
1441                 goto err_vma;
1442
1443         i915_request_add(rq);
1444         if (i915_request_wait(rq, 0, HZ / 5) < 0) {
1445                 err = -ETIME;
1446                 goto err_vma;
1447         }
1448
1449         results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1450         if (IS_ERR(results)) {
1451                 err = PTR_ERR(results);
1452                 goto err_vma;
1453         }
1454
1455         err = 0;
1456         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1457                 if (!wa_verify(wa, results[i], wal->name, from))
1458                         err = -ENXIO;
1459
1460         i915_gem_object_unpin_map(vma->obj);
1461
1462 err_vma:
1463         i915_vma_unpin(vma);
1464         i915_vma_put(vma);
1465         return err;
1466 }
1467
1468 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
1469                                     const char *from)
1470 {
1471         return engine_wa_list_verify(engine->kernel_context,
1472                                      &engine->wa_list,
1473                                      from);
1474 }
1475
1476 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1477 #include "selftest_workarounds.c"
1478 #endif