Revert "drm/i915: Expand subslice mask"
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / gt / intel_workarounds.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2018 Intel Corporation
5  */
6
7 #include "i915_drv.h"
8 #include "intel_context.h"
9 #include "intel_workarounds.h"
10
11 /**
12  * DOC: Hardware workarounds
13  *
14  * This file is intended as a central place to implement most [1]_ of the
15  * required workarounds for hardware to work as originally intended. They fall
16  * in five basic categories depending on how/when they are applied:
17  *
18  * - Workarounds that touch registers that are saved/restored to/from the HW
19  *   context image. The list is emitted (via Load Register Immediate commands)
20  *   everytime a new context is created.
21  * - GT workarounds. The list of these WAs is applied whenever these registers
22  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
23  * - Display workarounds. The list is applied during display clock-gating
24  *   initialization.
25  * - Workarounds that whitelist a privileged register, so that UMDs can manage
26  *   them directly. This is just a special case of a MMMIO workaround (as we
27  *   write the list of these to/be-whitelisted registers to some special HW
28  *   registers).
29  * - Workaround batchbuffers, that get executed automatically by the hardware
30  *   on every HW context restore.
31  *
32  * .. [1] Please notice that there are other WAs that, due to their nature,
33  *    cannot be applied from a central place. Those are peppered around the rest
34  *    of the code, as needed.
35  *
36  * .. [2] Technically, some registers are powercontext saved & restored, so they
37  *    survive a suspend/resume. In practice, writing them again is not too
38  *    costly and simplifies things. We can revisit this in the future.
39  *
40  * Layout
41  * ''''''
42  *
43  * Keep things in this file ordered by WA type, as per the above (context, GT,
44  * display, register whitelist, batchbuffer). Then, inside each type, keep the
45  * following order:
46  *
47  * - Infrastructure functions and macros
48  * - WAs per platform in standard gen/chrono order
49  * - Public functions to init or apply the given workaround type.
50  */
51
52 static void wa_init_start(struct i915_wa_list *wal, const char *name)
53 {
54         wal->name = name;
55 }
56
57 #define WA_LIST_CHUNK (1 << 4)
58
59 static void wa_init_finish(struct i915_wa_list *wal)
60 {
61         /* Trim unused entries. */
62         if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
63                 struct i915_wa *list = kmemdup(wal->list,
64                                                wal->count * sizeof(*list),
65                                                GFP_KERNEL);
66
67                 if (list) {
68                         kfree(wal->list);
69                         wal->list = list;
70                 }
71         }
72
73         if (!wal->count)
74                 return;
75
76         DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
77                          wal->wa_count, wal->name);
78 }
79
80 static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
81 {
82         unsigned int addr = i915_mmio_reg_offset(wa->reg);
83         unsigned int start = 0, end = wal->count;
84         const unsigned int grow = WA_LIST_CHUNK;
85         struct i915_wa *wa_;
86
87         GEM_BUG_ON(!is_power_of_2(grow));
88
89         if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
90                 struct i915_wa *list;
91
92                 list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
93                                      GFP_KERNEL);
94                 if (!list) {
95                         DRM_ERROR("No space for workaround init!\n");
96                         return;
97                 }
98
99                 if (wal->list)
100                         memcpy(list, wal->list, sizeof(*wa) * wal->count);
101
102                 wal->list = list;
103         }
104
105         while (start < end) {
106                 unsigned int mid = start + (end - start) / 2;
107
108                 if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
109                         start = mid + 1;
110                 } else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
111                         end = mid;
112                 } else {
113                         wa_ = &wal->list[mid];
114
115                         if ((wa->mask & ~wa_->mask) == 0) {
116                                 DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
117                                           i915_mmio_reg_offset(wa_->reg),
118                                           wa_->mask, wa_->val);
119
120                                 wa_->val &= ~wa->mask;
121                         }
122
123                         wal->wa_count++;
124                         wa_->val |= wa->val;
125                         wa_->mask |= wa->mask;
126                         wa_->read |= wa->read;
127                         return;
128                 }
129         }
130
131         wal->wa_count++;
132         wa_ = &wal->list[wal->count++];
133         *wa_ = *wa;
134
135         while (wa_-- > wal->list) {
136                 GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
137                            i915_mmio_reg_offset(wa_[1].reg));
138                 if (i915_mmio_reg_offset(wa_[1].reg) >
139                     i915_mmio_reg_offset(wa_[0].reg))
140                         break;
141
142                 swap(wa_[1], wa_[0]);
143         }
144 }
145
146 static void
147 wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
148                    u32 val)
149 {
150         struct i915_wa wa = {
151                 .reg  = reg,
152                 .mask = mask,
153                 .val  = val,
154                 .read = mask,
155         };
156
157         _wa_add(wal, &wa);
158 }
159
160 static void
161 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
162 {
163         wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
164 }
165
166 static void
167 wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
168 {
169         wa_write_masked_or(wal, reg, ~0, val);
170 }
171
172 static void
173 wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
174 {
175         wa_write_masked_or(wal, reg, val, val);
176 }
177
178 static void
179 ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
180 {
181         struct i915_wa wa = {
182                 .reg  = reg,
183                 .mask = mask,
184                 .val  = val,
185                 /* Bonkers HW, skip verifying */
186         };
187
188         _wa_add(wal, &wa);
189 }
190
191 #define WA_SET_BIT_MASKED(addr, mask) \
192         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
193
194 #define WA_CLR_BIT_MASKED(addr, mask) \
195         wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
196
197 #define WA_SET_FIELD_MASKED(addr, mask, value) \
198         wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
199
200 static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
201                                       struct i915_wa_list *wal)
202 {
203         WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
204
205         /* WaDisableAsyncFlipPerfMode:bdw,chv */
206         WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
207
208         /* WaDisablePartialInstShootdown:bdw,chv */
209         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
210                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
211
212         /* Use Force Non-Coherent whenever executing a 3D context. This is a
213          * workaround for for a possible hang in the unlikely event a TLB
214          * invalidation occurs during a PSD flush.
215          */
216         /* WaForceEnableNonCoherent:bdw,chv */
217         /* WaHdcDisableFetchWhenMasked:bdw,chv */
218         WA_SET_BIT_MASKED(HDC_CHICKEN0,
219                           HDC_DONOT_FETCH_MEM_WHEN_MASKED |
220                           HDC_FORCE_NON_COHERENT);
221
222         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
223          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
224          *  polygons in the same 8x4 pixel/sample area to be processed without
225          *  stalling waiting for the earlier ones to write to Hierarchical Z
226          *  buffer."
227          *
228          * This optimization is off by default for BDW and CHV; turn it on.
229          */
230         WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
231
232         /* Wa4x4STCOptimizationDisable:bdw,chv */
233         WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
234
235         /*
236          * BSpec recommends 8x4 when MSAA is used,
237          * however in practice 16x4 seems fastest.
238          *
239          * Note that PS/WM thread counts depend on the WIZ hashing
240          * disable bit, which we don't touch here, but it's good
241          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
242          */
243         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
244                             GEN6_WIZ_HASHING_MASK,
245                             GEN6_WIZ_HASHING_16x4);
246 }
247
248 static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
249                                      struct i915_wa_list *wal)
250 {
251         struct drm_i915_private *i915 = engine->i915;
252
253         gen8_ctx_workarounds_init(engine, wal);
254
255         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
256         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
257
258         /* WaDisableDopClockGating:bdw
259          *
260          * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
261          * to disable EUTC clock gating.
262          */
263         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
264                           DOP_CLOCK_GATING_DISABLE);
265
266         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
267                           GEN8_SAMPLER_POWER_BYPASS_DIS);
268
269         WA_SET_BIT_MASKED(HDC_CHICKEN0,
270                           /* WaForceContextSaveRestoreNonCoherent:bdw */
271                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
272                           /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
273                           (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
274 }
275
276 static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
277                                      struct i915_wa_list *wal)
278 {
279         gen8_ctx_workarounds_init(engine, wal);
280
281         /* WaDisableThreadStallDopClockGating:chv */
282         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
283
284         /* Improve HiZ throughput on CHV. */
285         WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
286 }
287
288 static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
289                                       struct i915_wa_list *wal)
290 {
291         struct drm_i915_private *i915 = engine->i915;
292
293         if (HAS_LLC(i915)) {
294                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
295                  *
296                  * Must match Display Engine. See
297                  * WaCompressedResourceDisplayNewHashMode.
298                  */
299                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
300                                   GEN9_PBE_COMPRESSED_HASH_SELECTION);
301                 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
302                                   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
303         }
304
305         /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
306         /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
307         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
308                           FLOW_CONTROL_ENABLE |
309                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
310
311         /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
312         if (!IS_COFFEELAKE(i915))
313                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
314                                   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
315
316         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
317         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
318         WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
319                           GEN9_ENABLE_YV12_BUGFIX |
320                           GEN9_ENABLE_GPGPU_PREEMPTION);
321
322         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
323         /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
324         WA_SET_BIT_MASKED(CACHE_MODE_1,
325                           GEN8_4x4_STC_OPTIMIZATION_DISABLE |
326                           GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
327
328         /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
329         WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
330                           GEN9_CCS_TLB_PREFETCH_ENABLE);
331
332         /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
333         WA_SET_BIT_MASKED(HDC_CHICKEN0,
334                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
335                           HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
336
337         /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
338          * both tied to WaForceContextSaveRestoreNonCoherent
339          * in some hsds for skl. We keep the tie for all gen9. The
340          * documentation is a bit hazy and so we want to get common behaviour,
341          * even though there is no clear evidence we would need both on kbl/bxt.
342          * This area has been source of system hangs so we play it safe
343          * and mimic the skl regardless of what bspec says.
344          *
345          * Use Force Non-Coherent whenever executing a 3D context. This
346          * is a workaround for a possible hang in the unlikely event
347          * a TLB invalidation occurs during a PSD flush.
348          */
349
350         /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
351         WA_SET_BIT_MASKED(HDC_CHICKEN0,
352                           HDC_FORCE_NON_COHERENT);
353
354         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
355         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
356                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
357                                   GEN8_SAMPLER_POWER_BYPASS_DIS);
358
359         /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
360         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
361
362         /*
363          * Supporting preemption with fine-granularity requires changes in the
364          * batch buffer programming. Since we can't break old userspace, we
365          * need to set our default preemption level to safe value. Userspace is
366          * still able to use more fine-grained preemption levels, since in
367          * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
368          * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
369          * not real HW workarounds, but merely a way to start using preemption
370          * while maintaining old contract with userspace.
371          */
372
373         /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
374         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
375
376         /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
377         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
378                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
379                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
380
381         /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
382         if (IS_GEN9_LP(i915))
383                 WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
384 }
385
386 static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
387                                 struct i915_wa_list *wal)
388 {
389         struct drm_i915_private *i915 = engine->i915;
390         u8 vals[3] = { 0, 0, 0 };
391         unsigned int i;
392
393         for (i = 0; i < 3; i++) {
394                 u8 ss;
395
396                 /*
397                  * Only consider slices where one, and only one, subslice has 7
398                  * EUs
399                  */
400                 if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
401                         continue;
402
403                 /*
404                  * subslice_7eu[i] != 0 (because of the check above) and
405                  * ss_max == 4 (maximum number of subslices possible per slice)
406                  *
407                  * ->    0 <= ss <= 3;
408                  */
409                 ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
410                 vals[i] = 3 - ss;
411         }
412
413         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
414                 return;
415
416         /* Tune IZ hashing. See intel_device_info_runtime_init() */
417         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
418                             GEN9_IZ_HASHING_MASK(2) |
419                             GEN9_IZ_HASHING_MASK(1) |
420                             GEN9_IZ_HASHING_MASK(0),
421                             GEN9_IZ_HASHING(2, vals[2]) |
422                             GEN9_IZ_HASHING(1, vals[1]) |
423                             GEN9_IZ_HASHING(0, vals[0]));
424 }
425
426 static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
427                                      struct i915_wa_list *wal)
428 {
429         gen9_ctx_workarounds_init(engine, wal);
430         skl_tune_iz_hashing(engine, wal);
431 }
432
433 static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
434                                      struct i915_wa_list *wal)
435 {
436         gen9_ctx_workarounds_init(engine, wal);
437
438         /* WaDisableThreadStallDopClockGating:bxt */
439         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
440                           STALL_DOP_GATING_DISABLE);
441
442         /* WaToEnableHwFixForPushConstHWBug:bxt */
443         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
444                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
445 }
446
447 static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
448                                      struct i915_wa_list *wal)
449 {
450         struct drm_i915_private *i915 = engine->i915;
451
452         gen9_ctx_workarounds_init(engine, wal);
453
454         /* WaToEnableHwFixForPushConstHWBug:kbl */
455         if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
456                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
457                                   GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
458
459         /* WaDisableSbeCacheDispatchPortSharing:kbl */
460         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
461                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
462 }
463
464 static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
465                                      struct i915_wa_list *wal)
466 {
467         gen9_ctx_workarounds_init(engine, wal);
468
469         /* WaToEnableHwFixForPushConstHWBug:glk */
470         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
471                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
472 }
473
474 static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
475                                      struct i915_wa_list *wal)
476 {
477         gen9_ctx_workarounds_init(engine, wal);
478
479         /* WaToEnableHwFixForPushConstHWBug:cfl */
480         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
481                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
482
483         /* WaDisableSbeCacheDispatchPortSharing:cfl */
484         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
485                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
486 }
487
488 static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
489                                      struct i915_wa_list *wal)
490 {
491         struct drm_i915_private *i915 = engine->i915;
492
493         /* WaForceContextSaveRestoreNonCoherent:cnl */
494         WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
495                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
496
497         /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
498         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
499                 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
500
501         /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
502         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
503                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
504
505         /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
506         if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
507                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
508                                   GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
509
510         /* WaPushConstantDereferenceHoldDisable:cnl */
511         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
512
513         /* FtrEnableFastAnisoL1BankingFix:cnl */
514         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
515
516         /* WaDisable3DMidCmdPreemption:cnl */
517         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
518
519         /* WaDisableGPGPUMidCmdPreemption:cnl */
520         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
521                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
522                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
523
524         /* WaDisableEarlyEOT:cnl */
525         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
526 }
527
528 static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
529                                      struct i915_wa_list *wal)
530 {
531         struct drm_i915_private *i915 = engine->i915;
532
533         /* WaDisableBankHangMode:icl */
534         wa_write(wal,
535                  GEN8_L3CNTLREG,
536                  intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
537                  GEN8_ERRDETBCTRL);
538
539         /* Wa_1604370585:icl (pre-prod)
540          * Formerly known as WaPushConstantDereferenceHoldDisable
541          */
542         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
543                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
544                                   PUSH_CONSTANT_DEREF_DISABLE);
545
546         /* WaForceEnableNonCoherent:icl
547          * This is not the same workaround as in early Gen9 platforms, where
548          * lacking this could cause system hangs, but coherency performance
549          * overhead is high and only a few compute workloads really need it
550          * (the register is whitelisted in hardware now, so UMDs can opt in
551          * for coherency if they have a good reason).
552          */
553         WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
554
555         /* Wa_2006611047:icl (pre-prod)
556          * Formerly known as WaDisableImprovedTdlClkGating
557          */
558         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
559                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
560                                   GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
561
562         /* Wa_2006665173:icl (pre-prod) */
563         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
564                 WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
565                                   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
566
567         /* WaEnableFloatBlendOptimization:icl */
568         wa_write_masked_or(wal,
569                            GEN10_CACHE_MODE_SS,
570                            0, /* write-only, so skip validation */
571                            _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
572
573         /* WaDisableGPGPUMidThreadPreemption:icl */
574         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
575                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
576                             GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
577
578         /* allow headerless messages for preemptible GPGPU context */
579         WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
580                           GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
581 }
582
583 static void
584 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
585                            struct i915_wa_list *wal,
586                            const char *name)
587 {
588         struct drm_i915_private *i915 = engine->i915;
589
590         if (engine->class != RENDER_CLASS)
591                 return;
592
593         wa_init_start(wal, name);
594
595         if (IS_GEN(i915, 11))
596                 icl_ctx_workarounds_init(engine, wal);
597         else if (IS_CANNONLAKE(i915))
598                 cnl_ctx_workarounds_init(engine, wal);
599         else if (IS_COFFEELAKE(i915))
600                 cfl_ctx_workarounds_init(engine, wal);
601         else if (IS_GEMINILAKE(i915))
602                 glk_ctx_workarounds_init(engine, wal);
603         else if (IS_KABYLAKE(i915))
604                 kbl_ctx_workarounds_init(engine, wal);
605         else if (IS_BROXTON(i915))
606                 bxt_ctx_workarounds_init(engine, wal);
607         else if (IS_SKYLAKE(i915))
608                 skl_ctx_workarounds_init(engine, wal);
609         else if (IS_CHERRYVIEW(i915))
610                 chv_ctx_workarounds_init(engine, wal);
611         else if (IS_BROADWELL(i915))
612                 bdw_ctx_workarounds_init(engine, wal);
613         else if (INTEL_GEN(i915) < 8)
614                 return;
615         else
616                 MISSING_CASE(INTEL_GEN(i915));
617
618         wa_init_finish(wal);
619 }
620
621 void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
622 {
623         __intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
624 }
625
626 int intel_engine_emit_ctx_wa(struct i915_request *rq)
627 {
628         struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
629         struct i915_wa *wa;
630         unsigned int i;
631         u32 *cs;
632         int ret;
633
634         if (wal->count == 0)
635                 return 0;
636
637         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
638         if (ret)
639                 return ret;
640
641         cs = intel_ring_begin(rq, (wal->count * 2 + 2));
642         if (IS_ERR(cs))
643                 return PTR_ERR(cs);
644
645         *cs++ = MI_LOAD_REGISTER_IMM(wal->count);
646         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
647                 *cs++ = i915_mmio_reg_offset(wa->reg);
648                 *cs++ = wa->val;
649         }
650         *cs++ = MI_NOOP;
651
652         intel_ring_advance(rq, cs);
653
654         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
655         if (ret)
656                 return ret;
657
658         return 0;
659 }
660
661 static void
662 gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
663 {
664         /* WaDisableKillLogic:bxt,skl,kbl */
665         if (!IS_COFFEELAKE(i915))
666                 wa_write_or(wal,
667                             GAM_ECOCHK,
668                             ECOCHK_DIS_TLB);
669
670         if (HAS_LLC(i915)) {
671                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
672                  *
673                  * Must match Display Engine. See
674                  * WaCompressedResourceDisplayNewHashMode.
675                  */
676                 wa_write_or(wal,
677                             MMCD_MISC_CTRL,
678                             MMCD_PCLA | MMCD_HOTSPOT_EN);
679         }
680
681         /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
682         wa_write_or(wal,
683                     GAM_ECOCHK,
684                     BDW_DISABLE_HDC_INVALIDATION);
685 }
686
687 static void
688 skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
689 {
690         gen9_gt_workarounds_init(i915, wal);
691
692         /* WaDisableGafsUnitClkGating:skl */
693         wa_write_or(wal,
694                     GEN7_UCGCTL4,
695                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
696
697         /* WaInPlaceDecompressionHang:skl */
698         if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
699                 wa_write_or(wal,
700                             GEN9_GAMT_ECO_REG_RW_IA,
701                             GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
702 }
703
704 static void
705 bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
706 {
707         gen9_gt_workarounds_init(i915, wal);
708
709         /* WaInPlaceDecompressionHang:bxt */
710         wa_write_or(wal,
711                     GEN9_GAMT_ECO_REG_RW_IA,
712                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
713 }
714
715 static void
716 kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
717 {
718         gen9_gt_workarounds_init(i915, wal);
719
720         /* WaDisableDynamicCreditSharing:kbl */
721         if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
722                 wa_write_or(wal,
723                             GAMT_CHKN_BIT_REG,
724                             GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
725
726         /* WaDisableGafsUnitClkGating:kbl */
727         wa_write_or(wal,
728                     GEN7_UCGCTL4,
729                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
730
731         /* WaInPlaceDecompressionHang:kbl */
732         wa_write_or(wal,
733                     GEN9_GAMT_ECO_REG_RW_IA,
734                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
735 }
736
737 static void
738 glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
739 {
740         gen9_gt_workarounds_init(i915, wal);
741 }
742
743 static void
744 cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
745 {
746         gen9_gt_workarounds_init(i915, wal);
747
748         /* WaDisableGafsUnitClkGating:cfl */
749         wa_write_or(wal,
750                     GEN7_UCGCTL4,
751                     GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
752
753         /* WaInPlaceDecompressionHang:cfl */
754         wa_write_or(wal,
755                     GEN9_GAMT_ECO_REG_RW_IA,
756                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
757 }
758
759 static void
760 wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
761 {
762         const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
763         u32 mcr_slice_subslice_mask;
764
765         /*
766          * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
767          * L3Banks could be fused off in single slice scenario. If that is
768          * the case, we might need to program MCR select to a valid L3Bank
769          * by default, to make sure we correctly read certain registers
770          * later on (in the range 0xB100 - 0xB3FF).
771          * This might be incompatible with
772          * WaProgramMgsrForCorrectSliceSpecificMmioReads.
773          * Fortunately, this should not happen in production hardware, so
774          * we only assert that this is the case (instead of implementing
775          * something more complex that requires checking the range of every
776          * MMIO read).
777          */
778         if (INTEL_GEN(i915) >= 10 &&
779             is_power_of_2(sseu->slice_mask)) {
780                 /*
781                  * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
782                  * enabled subslice, no need to redirect MCR packet
783                  */
784                 u32 slice = fls(sseu->slice_mask);
785                 u32 fuse3 =
786                         intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
787                 u8 ss_mask = sseu->subslice_mask[slice];
788
789                 u8 enabled_mask = (ss_mask | ss_mask >>
790                                    GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
791                 u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
792
793                 /*
794                  * Production silicon should have matched L3Bank and
795                  * subslice enabled
796                  */
797                 WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
798         }
799
800         if (INTEL_GEN(i915) >= 11)
801                 mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
802                                           GEN11_MCR_SUBSLICE_MASK;
803         else
804                 mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
805                                           GEN8_MCR_SUBSLICE_MASK;
806         /*
807          * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
808          * Before any MMIO read into slice/subslice specific registers, MCR
809          * packet control register needs to be programmed to point to any
810          * enabled s/ss pair. Otherwise, incorrect values will be returned.
811          * This means each subsequent MMIO read will be forwarded to an
812          * specific s/ss combination, but this is OK since these registers
813          * are consistent across s/ss in almost all cases. In the rare
814          * occasions, such as INSTDONE, where this value is dependent
815          * on s/ss combo, the read should be done with read_subslice_reg.
816          */
817         wa_write_masked_or(wal,
818                            GEN8_MCR_SELECTOR,
819                            mcr_slice_subslice_mask,
820                            intel_calculate_mcr_s_ss_select(i915));
821 }
822
823 static void
824 cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
825 {
826         wa_init_mcr(i915, wal);
827
828         /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
829         if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
830                 wa_write_or(wal,
831                             GAMT_CHKN_BIT_REG,
832                             GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
833
834         /* WaInPlaceDecompressionHang:cnl */
835         wa_write_or(wal,
836                     GEN9_GAMT_ECO_REG_RW_IA,
837                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
838 }
839
840 static void
841 icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
842 {
843         wa_init_mcr(i915, wal);
844
845         /* WaInPlaceDecompressionHang:icl */
846         wa_write_or(wal,
847                     GEN9_GAMT_ECO_REG_RW_IA,
848                     GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
849
850         /* WaModifyGamTlbPartitioning:icl */
851         wa_write_masked_or(wal,
852                            GEN11_GACB_PERF_CTRL,
853                            GEN11_HASH_CTRL_MASK,
854                            GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
855
856         /* Wa_1405766107:icl
857          * Formerly known as WaCL2SFHalfMaxAlloc
858          */
859         wa_write_or(wal,
860                     GEN11_LSN_UNSLCVC,
861                     GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
862                     GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
863
864         /* Wa_220166154:icl
865          * Formerly known as WaDisCtxReload
866          */
867         wa_write_or(wal,
868                     GEN8_GAMW_ECO_DEV_RW_IA,
869                     GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
870
871         /* Wa_1405779004:icl (pre-prod) */
872         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
873                 wa_write_or(wal,
874                             SLICE_UNIT_LEVEL_CLKGATE,
875                             MSCUNIT_CLKGATE_DIS);
876
877         /* Wa_1406680159:icl */
878         wa_write_or(wal,
879                     SUBSLICE_UNIT_LEVEL_CLKGATE,
880                     GWUNIT_CLKGATE_DIS);
881
882         /* Wa_1406838659:icl (pre-prod) */
883         if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
884                 wa_write_or(wal,
885                             INF_UNIT_LEVEL_CLKGATE,
886                             CGPSF_CLKGATE_DIS);
887
888         /* Wa_1406463099:icl
889          * Formerly known as WaGamTlbPendError
890          */
891         wa_write_or(wal,
892                     GAMT_CHKN_BIT_REG,
893                     GAMT_CHKN_DISABLE_L3_COH_PIPE);
894 }
895
896 static void
897 gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
898 {
899         if (IS_GEN(i915, 11))
900                 icl_gt_workarounds_init(i915, wal);
901         else if (IS_CANNONLAKE(i915))
902                 cnl_gt_workarounds_init(i915, wal);
903         else if (IS_COFFEELAKE(i915))
904                 cfl_gt_workarounds_init(i915, wal);
905         else if (IS_GEMINILAKE(i915))
906                 glk_gt_workarounds_init(i915, wal);
907         else if (IS_KABYLAKE(i915))
908                 kbl_gt_workarounds_init(i915, wal);
909         else if (IS_BROXTON(i915))
910                 bxt_gt_workarounds_init(i915, wal);
911         else if (IS_SKYLAKE(i915))
912                 skl_gt_workarounds_init(i915, wal);
913         else if (INTEL_GEN(i915) <= 8)
914                 return;
915         else
916                 MISSING_CASE(INTEL_GEN(i915));
917 }
918
919 void intel_gt_init_workarounds(struct drm_i915_private *i915)
920 {
921         struct i915_wa_list *wal = &i915->gt_wa_list;
922
923         wa_init_start(wal, "GT");
924         gt_init_workarounds(i915, wal);
925         wa_init_finish(wal);
926 }
927
928 static enum forcewake_domains
929 wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
930 {
931         enum forcewake_domains fw = 0;
932         struct i915_wa *wa;
933         unsigned int i;
934
935         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
936                 fw |= intel_uncore_forcewake_for_reg(uncore,
937                                                      wa->reg,
938                                                      FW_REG_READ |
939                                                      FW_REG_WRITE);
940
941         return fw;
942 }
943
944 static bool
945 wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
946 {
947         if ((cur ^ wa->val) & wa->read) {
948                 DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
949                           name, from, i915_mmio_reg_offset(wa->reg),
950                           cur, cur & wa->read,
951                           wa->val, wa->mask);
952
953                 return false;
954         }
955
956         return true;
957 }
958
959 static void
960 wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
961 {
962         enum forcewake_domains fw;
963         unsigned long flags;
964         struct i915_wa *wa;
965         unsigned int i;
966
967         if (!wal->count)
968                 return;
969
970         fw = wal_get_fw_for_rmw(uncore, wal);
971
972         spin_lock_irqsave(&uncore->lock, flags);
973         intel_uncore_forcewake_get__locked(uncore, fw);
974
975         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
976                 intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
977                 if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
978                         wa_verify(wa,
979                                   intel_uncore_read_fw(uncore, wa->reg),
980                                   wal->name, "application");
981         }
982
983         intel_uncore_forcewake_put__locked(uncore, fw);
984         spin_unlock_irqrestore(&uncore->lock, flags);
985 }
986
987 void intel_gt_apply_workarounds(struct drm_i915_private *i915)
988 {
989         wa_list_apply(&i915->uncore, &i915->gt_wa_list);
990 }
991
992 static bool wa_list_verify(struct intel_uncore *uncore,
993                            const struct i915_wa_list *wal,
994                            const char *from)
995 {
996         struct i915_wa *wa;
997         unsigned int i;
998         bool ok = true;
999
1000         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1001                 ok &= wa_verify(wa,
1002                                 intel_uncore_read(uncore, wa->reg),
1003                                 wal->name, from);
1004
1005         return ok;
1006 }
1007
1008 bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
1009                                  const char *from)
1010 {
1011         return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
1012 }
1013
1014 static void
1015 whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
1016 {
1017         struct i915_wa wa = {
1018                 .reg = reg
1019         };
1020
1021         if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
1022                 return;
1023
1024         _wa_add(wal, &wa);
1025 }
1026
1027 static void gen9_whitelist_build(struct i915_wa_list *w)
1028 {
1029         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1030         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1031
1032         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1033         whitelist_reg(w, GEN8_CS_CHICKEN1);
1034
1035         /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1036         whitelist_reg(w, GEN8_HDC_CHICKEN1);
1037 }
1038
1039 static void skl_whitelist_build(struct i915_wa_list *w)
1040 {
1041         gen9_whitelist_build(w);
1042
1043         /* WaDisableLSQCROPERFforOCL:skl */
1044         whitelist_reg(w, GEN8_L3SQCREG4);
1045 }
1046
1047 static void bxt_whitelist_build(struct i915_wa_list *w)
1048 {
1049         gen9_whitelist_build(w);
1050 }
1051
1052 static void kbl_whitelist_build(struct i915_wa_list *w)
1053 {
1054         gen9_whitelist_build(w);
1055
1056         /* WaDisableLSQCROPERFforOCL:kbl */
1057         whitelist_reg(w, GEN8_L3SQCREG4);
1058 }
1059
1060 static void glk_whitelist_build(struct i915_wa_list *w)
1061 {
1062         gen9_whitelist_build(w);
1063
1064         /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1065         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1066 }
1067
1068 static void cfl_whitelist_build(struct i915_wa_list *w)
1069 {
1070         gen9_whitelist_build(w);
1071 }
1072
1073 static void cnl_whitelist_build(struct i915_wa_list *w)
1074 {
1075         /* WaEnablePreemptionGranularityControlByUMD:cnl */
1076         whitelist_reg(w, GEN8_CS_CHICKEN1);
1077 }
1078
1079 static void icl_whitelist_build(struct i915_wa_list *w)
1080 {
1081         /* WaAllowUMDToModifyHalfSliceChicken7:icl */
1082         whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
1083
1084         /* WaAllowUMDToModifySamplerMode:icl */
1085         whitelist_reg(w, GEN10_SAMPLER_MODE);
1086
1087         /* WaEnableStateCacheRedirectToCS:icl */
1088         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1089 }
1090
1091 void intel_engine_init_whitelist(struct intel_engine_cs *engine)
1092 {
1093         struct drm_i915_private *i915 = engine->i915;
1094         struct i915_wa_list *w = &engine->whitelist;
1095
1096         if (engine->class != RENDER_CLASS)
1097                 return;
1098
1099         wa_init_start(w, "whitelist");
1100
1101         if (IS_GEN(i915, 11))
1102                 icl_whitelist_build(w);
1103         else if (IS_CANNONLAKE(i915))
1104                 cnl_whitelist_build(w);
1105         else if (IS_COFFEELAKE(i915))
1106                 cfl_whitelist_build(w);
1107         else if (IS_GEMINILAKE(i915))
1108                 glk_whitelist_build(w);
1109         else if (IS_KABYLAKE(i915))
1110                 kbl_whitelist_build(w);
1111         else if (IS_BROXTON(i915))
1112                 bxt_whitelist_build(w);
1113         else if (IS_SKYLAKE(i915))
1114                 skl_whitelist_build(w);
1115         else if (INTEL_GEN(i915) <= 8)
1116                 return;
1117         else
1118                 MISSING_CASE(INTEL_GEN(i915));
1119
1120         wa_init_finish(w);
1121 }
1122
1123 void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
1124 {
1125         const struct i915_wa_list *wal = &engine->whitelist;
1126         struct intel_uncore *uncore = engine->uncore;
1127         const u32 base = engine->mmio_base;
1128         struct i915_wa *wa;
1129         unsigned int i;
1130
1131         if (!wal->count)
1132                 return;
1133
1134         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1135                 intel_uncore_write(uncore,
1136                                    RING_FORCE_TO_NONPRIV(base, i),
1137                                    i915_mmio_reg_offset(wa->reg));
1138
1139         /* And clear the rest just in case of garbage */
1140         for (; i < RING_MAX_NONPRIV_SLOTS; i++)
1141                 intel_uncore_write(uncore,
1142                                    RING_FORCE_TO_NONPRIV(base, i),
1143                                    i915_mmio_reg_offset(RING_NOPID(base)));
1144 }
1145
1146 static void
1147 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1148 {
1149         struct drm_i915_private *i915 = engine->i915;
1150
1151         if (IS_GEN(i915, 11)) {
1152                 /* This is not an Wa. Enable for better image quality */
1153                 wa_masked_en(wal,
1154                              _3D_CHICKEN3,
1155                              _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
1156
1157                 /* WaPipelineFlushCoherentLines:icl */
1158                 ignore_wa_write_or(wal,
1159                                    GEN8_L3SQCREG4,
1160                                    GEN8_LQSC_FLUSH_COHERENT_LINES,
1161                                    GEN8_LQSC_FLUSH_COHERENT_LINES);
1162
1163                 /*
1164                  * Wa_1405543622:icl
1165                  * Formerly known as WaGAPZPriorityScheme
1166                  */
1167                 wa_write_or(wal,
1168                             GEN8_GARBCNTL,
1169                             GEN11_ARBITRATION_PRIO_ORDER_MASK);
1170
1171                 /*
1172                  * Wa_1604223664:icl
1173                  * Formerly known as WaL3BankAddressHashing
1174                  */
1175                 wa_write_masked_or(wal,
1176                                    GEN8_GARBCNTL,
1177                                    GEN11_HASH_CTRL_EXCL_MASK,
1178                                    GEN11_HASH_CTRL_EXCL_BIT0);
1179                 wa_write_masked_or(wal,
1180                                    GEN11_GLBLINVL,
1181                                    GEN11_BANK_HASH_ADDR_EXCL_MASK,
1182                                    GEN11_BANK_HASH_ADDR_EXCL_BIT0);
1183
1184                 /*
1185                  * Wa_1405733216:icl
1186                  * Formerly known as WaDisableCleanEvicts
1187                  */
1188                 ignore_wa_write_or(wal,
1189                                    GEN8_L3SQCREG4,
1190                                    GEN11_LQSC_CLEAN_EVICT_DISABLE,
1191                                    GEN11_LQSC_CLEAN_EVICT_DISABLE);
1192
1193                 /* WaForwardProgressSoftReset:icl */
1194                 wa_write_or(wal,
1195                             GEN10_SCRATCH_LNCF2,
1196                             PMFLUSHDONE_LNICRSDROP |
1197                             PMFLUSH_GAPL3UNBLOCK |
1198                             PMFLUSHDONE_LNEBLK);
1199
1200                 /* Wa_1406609255:icl (pre-prod) */
1201                 if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
1202                         wa_write_or(wal,
1203                                     GEN7_SARCHKMD,
1204                                     GEN7_DISABLE_DEMAND_PREFETCH |
1205                                     GEN7_DISABLE_SAMPLER_PREFETCH);
1206         }
1207
1208         if (IS_GEN_RANGE(i915, 9, 11)) {
1209                 /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl */
1210                 wa_masked_en(wal,
1211                              GEN7_FF_SLICE_CS_CHICKEN1,
1212                              GEN9_FFSC_PERCTX_PREEMPT_CTRL);
1213         }
1214
1215         if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
1216                 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
1217                 wa_write_or(wal,
1218                             GEN8_GARBCNTL,
1219                             GEN9_GAPS_TSV_CREDIT_DISABLE);
1220         }
1221
1222         if (IS_BROXTON(i915)) {
1223                 /* WaDisablePooledEuLoadBalancingFix:bxt */
1224                 wa_masked_en(wal,
1225                              FF_SLICE_CS_CHICKEN2,
1226                              GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
1227         }
1228
1229         if (IS_GEN(i915, 9)) {
1230                 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
1231                 wa_masked_en(wal,
1232                              GEN9_CSFE_CHICKEN1_RCS,
1233                              GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
1234
1235                 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
1236                 wa_write_or(wal,
1237                             BDW_SCRATCH1,
1238                             GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
1239
1240                 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
1241                 if (IS_GEN9_LP(i915))
1242                         wa_write_masked_or(wal,
1243                                            GEN8_L3SQCREG1,
1244                                            L3_PRIO_CREDITS_MASK,
1245                                            L3_GENERAL_PRIO_CREDITS(62) |
1246                                            L3_HIGH_PRIO_CREDITS(2));
1247
1248                 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
1249                 wa_write_or(wal,
1250                             GEN8_L3SQCREG4,
1251                             GEN8_LQSC_FLUSH_COHERENT_LINES);
1252         }
1253 }
1254
1255 static void
1256 xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1257 {
1258         struct drm_i915_private *i915 = engine->i915;
1259
1260         /* WaKBLVECSSemaphoreWaitPoll:kbl */
1261         if (IS_KBL_REVID(i915, KBL_REVID_A0, KBL_REVID_E0)) {
1262                 wa_write(wal,
1263                          RING_SEMA_WAIT_POLL(engine->mmio_base),
1264                          1);
1265         }
1266 }
1267
1268 static void
1269 engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
1270 {
1271         if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
1272                 return;
1273
1274         if (engine->id == RCS0)
1275                 rcs_engine_wa_init(engine, wal);
1276         else
1277                 xcs_engine_wa_init(engine, wal);
1278 }
1279
1280 void intel_engine_init_workarounds(struct intel_engine_cs *engine)
1281 {
1282         struct i915_wa_list *wal = &engine->wa_list;
1283
1284         if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
1285                 return;
1286
1287         wa_init_start(wal, engine->name);
1288         engine_init_workarounds(engine, wal);
1289         wa_init_finish(wal);
1290 }
1291
1292 void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
1293 {
1294         wa_list_apply(engine->uncore, &engine->wa_list);
1295 }
1296
1297 static struct i915_vma *
1298 create_scratch(struct i915_address_space *vm, int count)
1299 {
1300         struct drm_i915_gem_object *obj;
1301         struct i915_vma *vma;
1302         unsigned int size;
1303         int err;
1304
1305         size = round_up(count * sizeof(u32), PAGE_SIZE);
1306         obj = i915_gem_object_create_internal(vm->i915, size);
1307         if (IS_ERR(obj))
1308                 return ERR_CAST(obj);
1309
1310         i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
1311
1312         vma = i915_vma_instance(obj, vm, NULL);
1313         if (IS_ERR(vma)) {
1314                 err = PTR_ERR(vma);
1315                 goto err_obj;
1316         }
1317
1318         err = i915_vma_pin(vma, 0, 0,
1319                            i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
1320         if (err)
1321                 goto err_obj;
1322
1323         return vma;
1324
1325 err_obj:
1326         i915_gem_object_put(obj);
1327         return ERR_PTR(err);
1328 }
1329
1330 static int
1331 wa_list_srm(struct i915_request *rq,
1332             const struct i915_wa_list *wal,
1333             struct i915_vma *vma)
1334 {
1335         const struct i915_wa *wa;
1336         unsigned int i;
1337         u32 srm, *cs;
1338
1339         srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
1340         if (INTEL_GEN(rq->i915) >= 8)
1341                 srm++;
1342
1343         cs = intel_ring_begin(rq, 4 * wal->count);
1344         if (IS_ERR(cs))
1345                 return PTR_ERR(cs);
1346
1347         for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
1348                 *cs++ = srm;
1349                 *cs++ = i915_mmio_reg_offset(wa->reg);
1350                 *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
1351                 *cs++ = 0;
1352         }
1353         intel_ring_advance(rq, cs);
1354
1355         return 0;
1356 }
1357
1358 static int engine_wa_list_verify(struct intel_context *ce,
1359                                  const struct i915_wa_list * const wal,
1360                                  const char *from)
1361 {
1362         const struct i915_wa *wa;
1363         struct i915_request *rq;
1364         struct i915_vma *vma;
1365         unsigned int i;
1366         u32 *results;
1367         int err;
1368
1369         if (!wal->count)
1370                 return 0;
1371
1372         vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count);
1373         if (IS_ERR(vma))
1374                 return PTR_ERR(vma);
1375
1376         rq = intel_context_create_request(ce);
1377         if (IS_ERR(rq)) {
1378                 err = PTR_ERR(rq);
1379                 goto err_vma;
1380         }
1381
1382         err = wa_list_srm(rq, wal, vma);
1383         if (err)
1384                 goto err_vma;
1385
1386         i915_request_add(rq);
1387         if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) {
1388                 err = -ETIME;
1389                 goto err_vma;
1390         }
1391
1392         results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
1393         if (IS_ERR(results)) {
1394                 err = PTR_ERR(results);
1395                 goto err_vma;
1396         }
1397
1398         err = 0;
1399         for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1400                 if (!wa_verify(wa, results[i], wal->name, from))
1401                         err = -ENXIO;
1402
1403         i915_gem_object_unpin_map(vma->obj);
1404
1405 err_vma:
1406         i915_vma_unpin(vma);
1407         i915_vma_put(vma);
1408         return err;
1409 }
1410
1411 int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
1412                                     const char *from)
1413 {
1414         return engine_wa_list_verify(engine->kernel_context,
1415                                      &engine->wa_list,
1416                                      from);
1417 }
1418
1419 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1420 #include "selftest_workarounds.c"
1421 #endif