5eec4ce965a599f05d0679c3188c8a095383b7e1
[sfrench/cifs-2.6.git] / drivers / gpu / drm / i915 / intel_workarounds.c
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2014-2018 Intel Corporation
5  */
6
7 #include "i915_drv.h"
8 #include "intel_workarounds.h"
9
10 /**
11  * DOC: Hardware workarounds
12  *
13  * This file is intended as a central place to implement most [1]_ of the
14  * required workarounds for hardware to work as originally intended. They fall
15  * in five basic categories depending on how/when they are applied:
16  *
17  * - Workarounds that touch registers that are saved/restored to/from the HW
18  *   context image. The list is emitted (via Load Register Immediate commands)
19  *   everytime a new context is created.
20  * - GT workarounds. The list of these WAs is applied whenever these registers
21  *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
22  * - Display workarounds. The list is applied during display clock-gating
23  *   initialization.
24  * - Workarounds that whitelist a privileged register, so that UMDs can manage
25  *   them directly. This is just a special case of a MMMIO workaround (as we
26  *   write the list of these to/be-whitelisted registers to some special HW
27  *   registers).
28  * - Workaround batchbuffers, that get executed automatically by the hardware
29  *   on every HW context restore.
30  *
31  * .. [1] Please notice that there are other WAs that, due to their nature,
32  *    cannot be applied from a central place. Those are peppered around the rest
33  *    of the code, as needed.
34  *
35  * .. [2] Technically, some registers are powercontext saved & restored, so they
36  *    survive a suspend/resume. In practice, writing them again is not too
37  *    costly and simplifies things. We can revisit this in the future.
38  *
39  * Layout
40  * ''''''
41  *
42  * Keep things in this file ordered by WA type, as per the above (context, GT,
43  * display, register whitelist, batchbuffer). Then, inside each type, keep the
44  * following order:
45  *
46  * - Infrastructure functions and macros
47  * - WAs per platform in standard gen/chrono order
48  * - Public functions to init or apply the given workaround type.
49  */
50
51 static int wa_add(struct drm_i915_private *dev_priv,
52                   i915_reg_t addr,
53                   const u32 mask, const u32 val)
54 {
55         const unsigned int idx = dev_priv->workarounds.count;
56
57         if (WARN_ON(idx >= I915_MAX_WA_REGS))
58                 return -ENOSPC;
59
60         dev_priv->workarounds.reg[idx].addr = addr;
61         dev_priv->workarounds.reg[idx].value = val;
62         dev_priv->workarounds.reg[idx].mask = mask;
63
64         dev_priv->workarounds.count++;
65
66         return 0;
67 }
68
69 #define WA_REG(addr, mask, val) do { \
70                 const int r = wa_add(dev_priv, (addr), (mask), (val)); \
71                 if (r) \
72                         return r; \
73         } while (0)
74
75 #define WA_SET_BIT_MASKED(addr, mask) \
76         WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
77
78 #define WA_CLR_BIT_MASKED(addr, mask) \
79         WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
80
81 #define WA_SET_FIELD_MASKED(addr, mask, value) \
82         WA_REG(addr, (mask), _MASKED_FIELD(mask, value))
83
84 static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv)
85 {
86         WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
87
88         /* WaDisableAsyncFlipPerfMode:bdw,chv */
89         WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
90
91         /* WaDisablePartialInstShootdown:bdw,chv */
92         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
93                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
94
95         /* Use Force Non-Coherent whenever executing a 3D context. This is a
96          * workaround for for a possible hang in the unlikely event a TLB
97          * invalidation occurs during a PSD flush.
98          */
99         /* WaForceEnableNonCoherent:bdw,chv */
100         /* WaHdcDisableFetchWhenMasked:bdw,chv */
101         WA_SET_BIT_MASKED(HDC_CHICKEN0,
102                           HDC_DONOT_FETCH_MEM_WHEN_MASKED |
103                           HDC_FORCE_NON_COHERENT);
104
105         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
106          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
107          *  polygons in the same 8x4 pixel/sample area to be processed without
108          *  stalling waiting for the earlier ones to write to Hierarchical Z
109          *  buffer."
110          *
111          * This optimization is off by default for BDW and CHV; turn it on.
112          */
113         WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
114
115         /* Wa4x4STCOptimizationDisable:bdw,chv */
116         WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
117
118         /*
119          * BSpec recommends 8x4 when MSAA is used,
120          * however in practice 16x4 seems fastest.
121          *
122          * Note that PS/WM thread counts depend on the WIZ hashing
123          * disable bit, which we don't touch here, but it's good
124          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
125          */
126         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
127                             GEN6_WIZ_HASHING_MASK,
128                             GEN6_WIZ_HASHING_16x4);
129
130         return 0;
131 }
132
133 static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv)
134 {
135         int ret;
136
137         ret = gen8_ctx_workarounds_init(dev_priv);
138         if (ret)
139                 return ret;
140
141         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
142         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
143
144         /* WaDisableDopClockGating:bdw
145          *
146          * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
147          * to disable EUTC clock gating.
148          */
149         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
150                           DOP_CLOCK_GATING_DISABLE);
151
152         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
153                           GEN8_SAMPLER_POWER_BYPASS_DIS);
154
155         WA_SET_BIT_MASKED(HDC_CHICKEN0,
156                           /* WaForceContextSaveRestoreNonCoherent:bdw */
157                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
158                           /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
159                           (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
160
161         return 0;
162 }
163
164 static int chv_ctx_workarounds_init(struct drm_i915_private *dev_priv)
165 {
166         int ret;
167
168         ret = gen8_ctx_workarounds_init(dev_priv);
169         if (ret)
170                 return ret;
171
172         /* WaDisableThreadStallDopClockGating:chv */
173         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
174
175         /* Improve HiZ throughput on CHV. */
176         WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
177
178         return 0;
179 }
180
181 static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv)
182 {
183         if (HAS_LLC(dev_priv)) {
184                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
185                  *
186                  * Must match Display Engine. See
187                  * WaCompressedResourceDisplayNewHashMode.
188                  */
189                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
190                                   GEN9_PBE_COMPRESSED_HASH_SELECTION);
191                 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
192                                   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
193         }
194
195         /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
196         /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
197         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
198                           FLOW_CONTROL_ENABLE |
199                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
200
201         /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
202         if (!IS_COFFEELAKE(dev_priv))
203                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
204                                   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
205
206         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
207         /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
208         WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
209                           GEN9_ENABLE_YV12_BUGFIX |
210                           GEN9_ENABLE_GPGPU_PREEMPTION);
211
212         /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
213         /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
214         WA_SET_BIT_MASKED(CACHE_MODE_1,
215                           GEN8_4x4_STC_OPTIMIZATION_DISABLE |
216                           GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
217
218         /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
219         WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
220                           GEN9_CCS_TLB_PREFETCH_ENABLE);
221
222         /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
223         WA_SET_BIT_MASKED(HDC_CHICKEN0,
224                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
225                           HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
226
227         /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
228          * both tied to WaForceContextSaveRestoreNonCoherent
229          * in some hsds for skl. We keep the tie for all gen9. The
230          * documentation is a bit hazy and so we want to get common behaviour,
231          * even though there is no clear evidence we would need both on kbl/bxt.
232          * This area has been source of system hangs so we play it safe
233          * and mimic the skl regardless of what bspec says.
234          *
235          * Use Force Non-Coherent whenever executing a 3D context. This
236          * is a workaround for a possible hang in the unlikely event
237          * a TLB invalidation occurs during a PSD flush.
238          */
239
240         /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
241         WA_SET_BIT_MASKED(HDC_CHICKEN0,
242                           HDC_FORCE_NON_COHERENT);
243
244         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
245         if (IS_SKYLAKE(dev_priv) ||
246             IS_KABYLAKE(dev_priv) ||
247             IS_COFFEELAKE(dev_priv))
248                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
249                                   GEN8_SAMPLER_POWER_BYPASS_DIS);
250
251         /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
252         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
253
254         /*
255          * Supporting preemption with fine-granularity requires changes in the
256          * batch buffer programming. Since we can't break old userspace, we
257          * need to set our default preemption level to safe value. Userspace is
258          * still able to use more fine-grained preemption levels, since in
259          * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
260          * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
261          * not real HW workarounds, but merely a way to start using preemption
262          * while maintaining old contract with userspace.
263          */
264
265         /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
266         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
267
268         /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
269         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
270                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
271                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
272
273         return 0;
274 }
275
276 static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv)
277 {
278         u8 vals[3] = { 0, 0, 0 };
279         unsigned int i;
280
281         for (i = 0; i < 3; i++) {
282                 u8 ss;
283
284                 /*
285                  * Only consider slices where one, and only one, subslice has 7
286                  * EUs
287                  */
288                 if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]))
289                         continue;
290
291                 /*
292                  * subslice_7eu[i] != 0 (because of the check above) and
293                  * ss_max == 4 (maximum number of subslices possible per slice)
294                  *
295                  * ->    0 <= ss <= 3;
296                  */
297                 ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1;
298                 vals[i] = 3 - ss;
299         }
300
301         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
302                 return 0;
303
304         /* Tune IZ hashing. See intel_device_info_runtime_init() */
305         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
306                             GEN9_IZ_HASHING_MASK(2) |
307                             GEN9_IZ_HASHING_MASK(1) |
308                             GEN9_IZ_HASHING_MASK(0),
309                             GEN9_IZ_HASHING(2, vals[2]) |
310                             GEN9_IZ_HASHING(1, vals[1]) |
311                             GEN9_IZ_HASHING(0, vals[0]));
312
313         return 0;
314 }
315
316 static int skl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
317 {
318         int ret;
319
320         ret = gen9_ctx_workarounds_init(dev_priv);
321         if (ret)
322                 return ret;
323
324         return skl_tune_iz_hashing(dev_priv);
325 }
326
327 static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv)
328 {
329         int ret;
330
331         ret = gen9_ctx_workarounds_init(dev_priv);
332         if (ret)
333                 return ret;
334
335         /* WaDisableThreadStallDopClockGating:bxt */
336         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
337                           STALL_DOP_GATING_DISABLE);
338
339         /* WaToEnableHwFixForPushConstHWBug:bxt */
340         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
341                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
342
343         return 0;
344 }
345
346 static int kbl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
347 {
348         int ret;
349
350         ret = gen9_ctx_workarounds_init(dev_priv);
351         if (ret)
352                 return ret;
353
354         /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */
355         if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0))
356                 WA_SET_BIT_MASKED(HDC_CHICKEN0,
357                                   HDC_FENCE_DEST_SLM_DISABLE);
358
359         /* WaToEnableHwFixForPushConstHWBug:kbl */
360         if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
361                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
362                                   GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
363
364         /* WaDisableSbeCacheDispatchPortSharing:kbl */
365         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
366                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
367
368         return 0;
369 }
370
371 static int glk_ctx_workarounds_init(struct drm_i915_private *dev_priv)
372 {
373         int ret;
374
375         ret = gen9_ctx_workarounds_init(dev_priv);
376         if (ret)
377                 return ret;
378
379         /* WaToEnableHwFixForPushConstHWBug:glk */
380         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
381                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
382
383         return 0;
384 }
385
386 static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
387 {
388         int ret;
389
390         ret = gen9_ctx_workarounds_init(dev_priv);
391         if (ret)
392                 return ret;
393
394         /* WaToEnableHwFixForPushConstHWBug:cfl */
395         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
396                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
397
398         /* WaDisableSbeCacheDispatchPortSharing:cfl */
399         WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
400                           GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
401
402         return 0;
403 }
404
405 static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
406 {
407         /* WaForceContextSaveRestoreNonCoherent:cnl */
408         WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
409                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
410
411         /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
412         if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
413                 WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);
414
415         /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
416         WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
417                           GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
418
419         /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
420         if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0))
421                 WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
422                                   GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
423
424         /* WaPushConstantDereferenceHoldDisable:cnl */
425         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
426
427         /* FtrEnableFastAnisoL1BankingFix:cnl */
428         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
429
430         /* WaDisable3DMidCmdPreemption:cnl */
431         WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
432
433         /* WaDisableGPGPUMidCmdPreemption:cnl */
434         WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
435                             GEN9_PREEMPT_GPGPU_LEVEL_MASK,
436                             GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
437
438         /* WaDisableEarlyEOT:cnl */
439         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
440
441         return 0;
442 }
443
444 static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv)
445 {
446         /* Wa_1604370585:icl (pre-prod)
447          * Formerly known as WaPushConstantDereferenceHoldDisable
448          */
449         if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
450                 WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
451                                   PUSH_CONSTANT_DEREF_DISABLE);
452
453         /* WaForceEnableNonCoherent:icl
454          * This is not the same workaround as in early Gen9 platforms, where
455          * lacking this could cause system hangs, but coherency performance
456          * overhead is high and only a few compute workloads really need it
457          * (the register is whitelisted in hardware now, so UMDs can opt in
458          * for coherency if they have a good reason).
459          */
460         WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
461
462         return 0;
463 }
464
465 int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv)
466 {
467         int err = 0;
468
469         dev_priv->workarounds.count = 0;
470
471         if (INTEL_GEN(dev_priv) < 8)
472                 err = 0;
473         else if (IS_BROADWELL(dev_priv))
474                 err = bdw_ctx_workarounds_init(dev_priv);
475         else if (IS_CHERRYVIEW(dev_priv))
476                 err = chv_ctx_workarounds_init(dev_priv);
477         else if (IS_SKYLAKE(dev_priv))
478                 err = skl_ctx_workarounds_init(dev_priv);
479         else if (IS_BROXTON(dev_priv))
480                 err = bxt_ctx_workarounds_init(dev_priv);
481         else if (IS_KABYLAKE(dev_priv))
482                 err = kbl_ctx_workarounds_init(dev_priv);
483         else if (IS_GEMINILAKE(dev_priv))
484                 err = glk_ctx_workarounds_init(dev_priv);
485         else if (IS_COFFEELAKE(dev_priv))
486                 err = cfl_ctx_workarounds_init(dev_priv);
487         else if (IS_CANNONLAKE(dev_priv))
488                 err = cnl_ctx_workarounds_init(dev_priv);
489         else if (IS_ICELAKE(dev_priv))
490                 err = icl_ctx_workarounds_init(dev_priv);
491         else
492                 MISSING_CASE(INTEL_GEN(dev_priv));
493         if (err)
494                 return err;
495
496         DRM_DEBUG_DRIVER("Number of context specific w/a: %d\n",
497                          dev_priv->workarounds.count);
498         return 0;
499 }
500
501 int intel_ctx_workarounds_emit(struct i915_request *rq)
502 {
503         struct i915_workarounds *w = &rq->i915->workarounds;
504         u32 *cs;
505         int ret, i;
506
507         if (w->count == 0)
508                 return 0;
509
510         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
511         if (ret)
512                 return ret;
513
514         cs = intel_ring_begin(rq, (w->count * 2 + 2));
515         if (IS_ERR(cs))
516                 return PTR_ERR(cs);
517
518         *cs++ = MI_LOAD_REGISTER_IMM(w->count);
519         for (i = 0; i < w->count; i++) {
520                 *cs++ = i915_mmio_reg_offset(w->reg[i].addr);
521                 *cs++ = w->reg[i].value;
522         }
523         *cs++ = MI_NOOP;
524
525         intel_ring_advance(rq, cs);
526
527         ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
528         if (ret)
529                 return ret;
530
531         return 0;
532 }
533
534 static void bdw_gt_workarounds_apply(struct drm_i915_private *dev_priv)
535 {
536 }
537
538 static void chv_gt_workarounds_apply(struct drm_i915_private *dev_priv)
539 {
540 }
541
542 static void gen9_gt_workarounds_apply(struct drm_i915_private *dev_priv)
543 {
544         /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
545         I915_WRITE(GEN9_CSFE_CHICKEN1_RCS,
546                    _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE));
547
548         /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
549         I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
550                    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
551
552         /* WaDisableKillLogic:bxt,skl,kbl */
553         if (!IS_COFFEELAKE(dev_priv))
554                 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
555                            ECOCHK_DIS_TLB);
556
557         if (HAS_LLC(dev_priv)) {
558                 /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
559                  *
560                  * Must match Display Engine. See
561                  * WaCompressedResourceDisplayNewHashMode.
562                  */
563                 I915_WRITE(MMCD_MISC_CTRL,
564                            I915_READ(MMCD_MISC_CTRL) |
565                            MMCD_PCLA |
566                            MMCD_HOTSPOT_EN);
567         }
568
569         /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
570         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
571                    BDW_DISABLE_HDC_INVALIDATION);
572
573         /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
574         if (IS_GEN9_LP(dev_priv)) {
575                 u32 val = I915_READ(GEN8_L3SQCREG1);
576
577                 val &= ~L3_PRIO_CREDITS_MASK;
578                 val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2);
579                 I915_WRITE(GEN8_L3SQCREG1, val);
580         }
581
582         /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
583         I915_WRITE(GEN8_L3SQCREG4,
584                    I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES);
585
586         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
587         I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
588                    _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
589 }
590
591 static void skl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
592 {
593         gen9_gt_workarounds_apply(dev_priv);
594
595         /* WaEnableGapsTsvCreditFix:skl */
596         I915_WRITE(GEN8_GARBCNTL,
597                    I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
598
599         /* WaDisableGafsUnitClkGating:skl */
600         I915_WRITE(GEN7_UCGCTL4,
601                    I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
602
603         /* WaInPlaceDecompressionHang:skl */
604         if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
605                 I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
606                            I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
607                            GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
608 }
609
610 static void bxt_gt_workarounds_apply(struct drm_i915_private *dev_priv)
611 {
612         gen9_gt_workarounds_apply(dev_priv);
613
614         /* WaDisablePooledEuLoadBalancingFix:bxt */
615         I915_WRITE(FF_SLICE_CS_CHICKEN2,
616                    _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE));
617
618         /* WaInPlaceDecompressionHang:bxt */
619         I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
620                    I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
621                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
622 }
623
624 static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
625 {
626         gen9_gt_workarounds_apply(dev_priv);
627
628         /* WaEnableGapsTsvCreditFix:kbl */
629         I915_WRITE(GEN8_GARBCNTL,
630                    I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
631
632         /* WaDisableDynamicCreditSharing:kbl */
633         if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
634                 I915_WRITE(GAMT_CHKN_BIT_REG,
635                            I915_READ(GAMT_CHKN_BIT_REG) |
636                            GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
637
638         /* WaDisableGafsUnitClkGating:kbl */
639         I915_WRITE(GEN7_UCGCTL4,
640                    I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
641
642         /* WaInPlaceDecompressionHang:kbl */
643         I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
644                    I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
645                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
646 }
647
648 static void glk_gt_workarounds_apply(struct drm_i915_private *dev_priv)
649 {
650         gen9_gt_workarounds_apply(dev_priv);
651 }
652
653 static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
654 {
655         gen9_gt_workarounds_apply(dev_priv);
656
657         /* WaEnableGapsTsvCreditFix:cfl */
658         I915_WRITE(GEN8_GARBCNTL,
659                    I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE);
660
661         /* WaDisableGafsUnitClkGating:cfl */
662         I915_WRITE(GEN7_UCGCTL4,
663                    I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
664
665         /* WaInPlaceDecompressionHang:cfl */
666         I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
667                    I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
668                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
669 }
670
671 static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
672 {
673         /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
674         if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0))
675                 I915_WRITE(GAMT_CHKN_BIT_REG,
676                            I915_READ(GAMT_CHKN_BIT_REG) |
677                            GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
678
679         /* WaInPlaceDecompressionHang:cnl */
680         I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
681                    I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
682                    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
683
684         /* WaEnablePreemptionGranularityControlByUMD:cnl */
685         I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
686                    _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
687 }
688
689 static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv)
690 {
691         /* This is not an Wa. Enable for better image quality */
692         I915_WRITE(_3D_CHICKEN3,
693                    _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE));
694
695         /* WaInPlaceDecompressionHang:icl */
696         I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
697                                             GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
698
699         /* WaPipelineFlushCoherentLines:icl */
700         I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
701                                    GEN8_LQSC_FLUSH_COHERENT_LINES);
702
703         /* Wa_1405543622:icl
704          * Formerly known as WaGAPZPriorityScheme
705          */
706         I915_WRITE(GEN8_GARBCNTL, I915_READ(GEN8_GARBCNTL) |
707                                   GEN11_ARBITRATION_PRIO_ORDER_MASK);
708
709         /* Wa_1604223664:icl
710          * Formerly known as WaL3BankAddressHashing
711          */
712         I915_WRITE(GEN8_GARBCNTL,
713                    (I915_READ(GEN8_GARBCNTL) & ~GEN11_HASH_CTRL_EXCL_MASK) |
714                    GEN11_HASH_CTRL_EXCL_BIT0);
715         I915_WRITE(GEN11_GLBLINVL,
716                    (I915_READ(GEN11_GLBLINVL) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK) |
717                    GEN11_BANK_HASH_ADDR_EXCL_BIT0);
718
719         /* WaModifyGamTlbPartitioning:icl */
720         I915_WRITE(GEN11_GACB_PERF_CTRL,
721                    (I915_READ(GEN11_GACB_PERF_CTRL) & ~GEN11_HASH_CTRL_MASK) |
722                    GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
723
724         /* Wa_1405733216:icl
725          * Formerly known as WaDisableCleanEvicts
726          */
727         I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
728                                    GEN11_LQSC_CLEAN_EVICT_DISABLE);
729
730         /* Wa_1405766107:icl
731          * Formerly known as WaCL2SFHalfMaxAlloc
732          */
733         I915_WRITE(GEN11_LSN_UNSLCVC, I915_READ(GEN11_LSN_UNSLCVC) |
734                                       GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
735                                       GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
736
737         /* Wa_220166154:icl
738          * Formerly known as WaDisCtxReload
739          */
740         I915_WRITE(GAMW_ECO_DEV_RW_IA_REG, I915_READ(GAMW_ECO_DEV_RW_IA_REG) |
741                                            GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
742
743         /* Wa_1405779004:icl (pre-prod) */
744         if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0))
745                 I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE,
746                            I915_READ(SLICE_UNIT_LEVEL_CLKGATE) |
747                            MSCUNIT_CLKGATE_DIS);
748
749         /* Wa_1406680159:icl */
750         I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE,
751                    I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE) |
752                    GWUNIT_CLKGATE_DIS);
753
754         /* Wa_1604302699:icl */
755         I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER,
756                    I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER) |
757                    GEN11_I2M_WRITE_DISABLE);
758
759         /* Wa_1406838659:icl (pre-prod) */
760         if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0))
761                 I915_WRITE(INF_UNIT_LEVEL_CLKGATE,
762                            I915_READ(INF_UNIT_LEVEL_CLKGATE) |
763                            CGPSF_CLKGATE_DIS);
764
765         /* WaForwardProgressSoftReset:icl */
766         I915_WRITE(GEN10_SCRATCH_LNCF2,
767                    I915_READ(GEN10_SCRATCH_LNCF2) |
768                    PMFLUSHDONE_LNICRSDROP |
769                    PMFLUSH_GAPL3UNBLOCK |
770                    PMFLUSHDONE_LNEBLK);
771 }
772
773 void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv)
774 {
775         if (INTEL_GEN(dev_priv) < 8)
776                 return;
777         else if (IS_BROADWELL(dev_priv))
778                 bdw_gt_workarounds_apply(dev_priv);
779         else if (IS_CHERRYVIEW(dev_priv))
780                 chv_gt_workarounds_apply(dev_priv);
781         else if (IS_SKYLAKE(dev_priv))
782                 skl_gt_workarounds_apply(dev_priv);
783         else if (IS_BROXTON(dev_priv))
784                 bxt_gt_workarounds_apply(dev_priv);
785         else if (IS_KABYLAKE(dev_priv))
786                 kbl_gt_workarounds_apply(dev_priv);
787         else if (IS_GEMINILAKE(dev_priv))
788                 glk_gt_workarounds_apply(dev_priv);
789         else if (IS_COFFEELAKE(dev_priv))
790                 cfl_gt_workarounds_apply(dev_priv);
791         else if (IS_CANNONLAKE(dev_priv))
792                 cnl_gt_workarounds_apply(dev_priv);
793         else if (IS_ICELAKE(dev_priv))
794                 icl_gt_workarounds_apply(dev_priv);
795         else
796                 MISSING_CASE(INTEL_GEN(dev_priv));
797 }
798
799 struct whitelist {
800         i915_reg_t reg[RING_MAX_NONPRIV_SLOTS];
801         unsigned int count;
802         u32 nopid;
803 };
804
805 static void whitelist_reg(struct whitelist *w, i915_reg_t reg)
806 {
807         if (GEM_WARN_ON(w->count >= RING_MAX_NONPRIV_SLOTS))
808                 return;
809
810         w->reg[w->count++] = reg;
811 }
812
813 static void bdw_whitelist_build(struct whitelist *w)
814 {
815 }
816
817 static void chv_whitelist_build(struct whitelist *w)
818 {
819 }
820
821 static void gen9_whitelist_build(struct whitelist *w)
822 {
823         /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
824         whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
825
826         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
827         whitelist_reg(w, GEN8_CS_CHICKEN1);
828
829         /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
830         whitelist_reg(w, GEN8_HDC_CHICKEN1);
831 }
832
833 static void skl_whitelist_build(struct whitelist *w)
834 {
835         gen9_whitelist_build(w);
836
837         /* WaDisableLSQCROPERFforOCL:skl */
838         whitelist_reg(w, GEN8_L3SQCREG4);
839 }
840
841 static void bxt_whitelist_build(struct whitelist *w)
842 {
843         gen9_whitelist_build(w);
844 }
845
846 static void kbl_whitelist_build(struct whitelist *w)
847 {
848         gen9_whitelist_build(w);
849
850         /* WaDisableLSQCROPERFforOCL:kbl */
851         whitelist_reg(w, GEN8_L3SQCREG4);
852 }
853
854 static void glk_whitelist_build(struct whitelist *w)
855 {
856         gen9_whitelist_build(w);
857
858         /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
859         whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
860 }
861
862 static void cfl_whitelist_build(struct whitelist *w)
863 {
864         gen9_whitelist_build(w);
865 }
866
867 static void cnl_whitelist_build(struct whitelist *w)
868 {
869         /* WaEnablePreemptionGranularityControlByUMD:cnl */
870         whitelist_reg(w, GEN8_CS_CHICKEN1);
871 }
872
873 static void icl_whitelist_build(struct whitelist *w)
874 {
875 }
876
877 static struct whitelist *whitelist_build(struct intel_engine_cs *engine,
878                                          struct whitelist *w)
879 {
880         struct drm_i915_private *i915 = engine->i915;
881
882         GEM_BUG_ON(engine->id != RCS);
883
884         w->count = 0;
885         w->nopid = i915_mmio_reg_offset(RING_NOPID(engine->mmio_base));
886
887         if (INTEL_GEN(i915) < 8)
888                 return NULL;
889         else if (IS_BROADWELL(i915))
890                 bdw_whitelist_build(w);
891         else if (IS_CHERRYVIEW(i915))
892                 chv_whitelist_build(w);
893         else if (IS_SKYLAKE(i915))
894                 skl_whitelist_build(w);
895         else if (IS_BROXTON(i915))
896                 bxt_whitelist_build(w);
897         else if (IS_KABYLAKE(i915))
898                 kbl_whitelist_build(w);
899         else if (IS_GEMINILAKE(i915))
900                 glk_whitelist_build(w);
901         else if (IS_COFFEELAKE(i915))
902                 cfl_whitelist_build(w);
903         else if (IS_CANNONLAKE(i915))
904                 cnl_whitelist_build(w);
905         else if (IS_ICELAKE(i915))
906                 icl_whitelist_build(w);
907         else
908                 MISSING_CASE(INTEL_GEN(i915));
909
910         return w;
911 }
912
913 static void whitelist_apply(struct intel_engine_cs *engine,
914                             const struct whitelist *w)
915 {
916         struct drm_i915_private *dev_priv = engine->i915;
917         const u32 base = engine->mmio_base;
918         unsigned int i;
919
920         if (!w)
921                 return;
922
923         intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
924
925         for (i = 0; i < w->count; i++)
926                 I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i),
927                               i915_mmio_reg_offset(w->reg[i]));
928
929         /* And clear the rest just in case of garbage */
930         for (; i < RING_MAX_NONPRIV_SLOTS; i++)
931                 I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), w->nopid);
932
933         intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
934 }
935
936 void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine)
937 {
938         struct whitelist w;
939
940         whitelist_apply(engine, whitelist_build(engine, &w));
941 }
942
943 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
944 #include "selftests/intel_workarounds.c"
945 #endif