drm/i915/ats-m: Add thread execution tuning setting
authorMatt Roper <matthew.d.roper@intel.com>
Fri, 26 Aug 2022 21:27:18 +0000 (14:27 -0700)
committerMatt Roper <matthew.d.roper@intel.com>
Tue, 30 Aug 2022 19:36:37 +0000 (12:36 -0700)
On client DG2 platforms, optimal performance is achieved with the
hardware's default "age based" thread execution setting.  However on
ATS-M, switching this to "round robin after dependencies" provides
better performance.  We'll add a new "tuning" feature flag to the ATS-M
device info to enable/disable this setting.

Bspec: 68331
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Matt Atwood <matthew.s.atwood@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220826212718.409948-1-matthew.d.roper@intel.com
drivers/gpu/drm/i915/gt/intel_gt_regs.h
drivers/gpu/drm/i915/gt/intel_workarounds.c
drivers/gpu/drm/i915/i915_pci.c
drivers/gpu/drm/i915/intel_device_info.h

index 94f9ddcfb3a551638a96f152a64e905b31175e07..d414785003ccc42f9cb2dbeb3bbfcd765c7f0e74 100644 (file)
 #define   GEN12_DISABLE_TDL_PUSH               REG_BIT(9)
 #define   GEN11_DIS_PICK_2ND_EU                        REG_BIT(7)
 #define   GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX      REG_BIT(4)
+#define   THREAD_EX_ARB_MODE                   REG_GENMASK(3, 2)
+#define   THREAD_EX_ARB_MODE_RR_AFTER_DEP      REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2)
 
 #define HSW_ROW_CHICKEN3                       _MMIO(0xe49c)
 #define   HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE   (1 << 6)
index 69a0c6a74474797a863e395450a0cc382a380ee5..6d2003d598e6ab456e6125212c1cd8d9631074da 100644 (file)
@@ -2700,6 +2700,15 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
                       0 /* write-only, so skip validation */,
                       true);
        }
+
+       /*
+        * This tuning setting proves beneficial only on ATS-M designs; the
+        * default "age based" setting is optimal on regular DG2 and other
+        * platforms.
+        */
+       if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
+               wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
+                                   THREAD_EX_ARB_MODE_RR_AFTER_DEP);
 }
 
 /*
index 3e3e95c7a63f10e20ab13febe3cc30b5eea42069..7b0384373e99cd653ac7022117a416a232182f06 100644 (file)
@@ -1083,6 +1083,7 @@ static const struct intel_device_info ats_m_info = {
        DG2_FEATURES,
        .display = { 0 },
        .require_force_probe = 1,
+       .tuning_thread_rr_after_dep = 1,
 };
 
 #define XE_HPC_FEATURES \
index 677fb68f172680880942948d5d17130cb26b311e..9cd912c2703cd9dfb30c383117eee20c2d1f727d 100644 (file)
@@ -172,6 +172,7 @@ enum intel_ppgtt_type {
        func(has_runtime_pm); \
        func(has_snoop); \
        func(has_coherent_ggtt); \
+       func(tuning_thread_rr_after_dep); \
        func(unfenced_needs_alignment); \
        func(hws_needs_physical);