2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
27 #include "amdgpu_gfx.h"
29 #include "vi_structs.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #define GFX8_NUM_GFX_RINGS 1
55 #define GFX8_MEC_HPD_SIZE 2048
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
80 #define SET_BPM_SERDES_CMD 1
81 #define CLE_BPM_SERDES_CMD 0
83 /* BPM Register Address*/
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
93 #define RLC_FormatDirectRegListLength 14
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
164 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
166 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
167 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
168 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
169 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
170 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
171 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
172 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
173 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
174 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
175 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
176 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
177 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
178 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
179 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
180 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
181 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
184 static const u32 golden_settings_tonga_a11[] =
186 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
187 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
188 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
189 mmGB_GPU_ID, 0x0000000f, 0x00000000,
190 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
191 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
192 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
193 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
194 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
195 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
196 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
197 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
198 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
199 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
200 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
201 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
204 static const u32 tonga_golden_common_all[] =
206 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
207 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
208 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
209 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
210 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
211 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
212 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
213 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
216 static const u32 tonga_mgcg_cgcg_init[] =
218 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
219 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
221 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
222 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
223 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
224 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
225 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
227 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
228 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
229 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
230 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
231 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
232 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
233 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
234 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
235 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
236 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
237 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
238 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
239 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
240 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
241 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
242 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
243 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
244 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
245 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
246 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
247 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
248 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
249 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
252 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
257 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
272 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
275 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
276 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
277 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
278 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
279 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
280 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
281 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
282 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
283 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
284 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
285 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
286 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
287 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
288 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
289 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
290 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
291 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
292 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
295 static const u32 golden_settings_polaris11_a11[] =
297 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
298 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
299 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
300 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
301 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
302 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
303 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
304 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
305 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
306 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
307 mmSQ_CONFIG, 0x07f80000, 0x01180000,
308 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
309 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
310 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
311 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
312 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
313 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
316 static const u32 polaris11_golden_common_all[] =
318 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
320 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
321 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
322 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
323 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
326 static const u32 golden_settings_polaris10_a11[] =
328 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
329 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
330 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
331 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
332 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
333 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
334 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
335 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
336 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
337 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
338 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
339 mmSQ_CONFIG, 0x07f80000, 0x07180000,
340 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
341 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
342 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
343 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
344 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
347 static const u32 polaris10_golden_common_all[] =
349 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
350 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
351 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
352 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
353 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
354 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
355 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
356 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
359 static const u32 fiji_golden_common_all[] =
361 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
363 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
364 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
365 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
366 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
367 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
368 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
369 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
370 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
373 static const u32 golden_settings_fiji_a10[] =
375 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
380 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
381 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
382 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
383 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
384 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
385 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
388 static const u32 fiji_mgcg_cgcg_init[] =
390 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
391 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
392 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
394 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
395 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
396 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
397 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
399 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
400 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
401 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
402 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
403 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
404 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
405 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
406 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
407 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
408 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
409 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
410 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
411 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
412 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
413 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
414 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
415 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
416 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
417 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
418 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
419 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
420 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
421 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
422 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
423 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
424 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
427 static const u32 golden_settings_iceland_a11[] =
429 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
430 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
431 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
432 mmGB_GPU_ID, 0x0000000f, 0x00000000,
433 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
434 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
435 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
436 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
437 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
438 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
439 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
440 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
441 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
442 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
443 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
444 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
447 static const u32 iceland_golden_common_all[] =
449 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
450 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
451 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
452 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
453 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
454 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
455 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
456 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
459 static const u32 iceland_mgcg_cgcg_init[] =
461 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
462 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
463 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
465 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
466 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
467 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
468 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
469 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
470 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
471 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
472 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
473 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
474 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
475 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
476 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
477 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
478 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
479 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
480 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
481 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
482 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
483 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
484 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
485 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
486 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
487 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
488 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
489 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
490 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
491 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
492 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
495 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
500 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
508 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
509 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
510 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
511 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
512 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
513 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
514 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
515 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
516 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
517 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
518 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
519 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
520 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
521 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
522 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
523 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
524 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
527 static const u32 cz_golden_settings_a11[] =
529 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
530 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
531 mmGB_GPU_ID, 0x0000000f, 0x00000000,
532 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
533 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
534 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
535 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
536 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
537 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
538 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
539 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
540 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
543 static const u32 cz_golden_common_all[] =
545 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
546 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
547 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
548 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
549 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
550 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
551 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
552 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
555 static const u32 cz_mgcg_cgcg_init[] =
557 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
558 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
559 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
560 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
561 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
562 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
563 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
564 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
566 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
567 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
568 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
569 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
570 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
571 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
572 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
573 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
574 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
575 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
576 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
577 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
578 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
579 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
580 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
581 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
582 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
583 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
584 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
585 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
586 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
587 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
588 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
591 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
596 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
611 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
614 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
615 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
616 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
617 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
618 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
619 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
620 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
621 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
622 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
623 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
624 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
625 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
626 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
627 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
628 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
629 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
630 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
631 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
634 static const u32 stoney_golden_settings_a11[] =
636 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
637 mmGB_GPU_ID, 0x0000000f, 0x00000000,
638 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
639 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
640 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
641 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
642 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
643 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
644 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
645 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
648 static const u32 stoney_golden_common_all[] =
650 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
651 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
652 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
653 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
654 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
655 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
656 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
657 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
660 static const u32 stoney_mgcg_cgcg_init[] =
662 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
663 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
664 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
665 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
666 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
669 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
670 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
671 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
672 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
673 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
674 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
675 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
676 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
678 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
680 switch (adev->asic_type) {
682 amdgpu_program_register_sequence(adev,
683 iceland_mgcg_cgcg_init,
684 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
685 amdgpu_program_register_sequence(adev,
686 golden_settings_iceland_a11,
687 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
688 amdgpu_program_register_sequence(adev,
689 iceland_golden_common_all,
690 (const u32)ARRAY_SIZE(iceland_golden_common_all));
693 amdgpu_program_register_sequence(adev,
695 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
696 amdgpu_program_register_sequence(adev,
697 golden_settings_fiji_a10,
698 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
699 amdgpu_program_register_sequence(adev,
700 fiji_golden_common_all,
701 (const u32)ARRAY_SIZE(fiji_golden_common_all));
705 amdgpu_program_register_sequence(adev,
706 tonga_mgcg_cgcg_init,
707 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
708 amdgpu_program_register_sequence(adev,
709 golden_settings_tonga_a11,
710 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
711 amdgpu_program_register_sequence(adev,
712 tonga_golden_common_all,
713 (const u32)ARRAY_SIZE(tonga_golden_common_all));
717 amdgpu_program_register_sequence(adev,
718 golden_settings_polaris11_a11,
719 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
720 amdgpu_program_register_sequence(adev,
721 polaris11_golden_common_all,
722 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
725 amdgpu_program_register_sequence(adev,
726 golden_settings_polaris10_a11,
727 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
728 amdgpu_program_register_sequence(adev,
729 polaris10_golden_common_all,
730 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
731 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
732 if (adev->pdev->revision == 0xc7 &&
733 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
734 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
735 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
736 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
737 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
741 amdgpu_program_register_sequence(adev,
743 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
744 amdgpu_program_register_sequence(adev,
745 cz_golden_settings_a11,
746 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
747 amdgpu_program_register_sequence(adev,
748 cz_golden_common_all,
749 (const u32)ARRAY_SIZE(cz_golden_common_all));
752 amdgpu_program_register_sequence(adev,
753 stoney_mgcg_cgcg_init,
754 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
755 amdgpu_program_register_sequence(adev,
756 stoney_golden_settings_a11,
757 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
758 amdgpu_program_register_sequence(adev,
759 stoney_golden_common_all,
760 (const u32)ARRAY_SIZE(stoney_golden_common_all));
767 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
769 adev->gfx.scratch.num_reg = 8;
770 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
771 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
774 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
776 struct amdgpu_device *adev = ring->adev;
782 r = amdgpu_gfx_scratch_get(adev, &scratch);
784 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
787 WREG32(scratch, 0xCAFEDEAD);
788 r = amdgpu_ring_alloc(ring, 3);
790 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
792 amdgpu_gfx_scratch_free(adev, scratch);
795 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
796 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
797 amdgpu_ring_write(ring, 0xDEADBEEF);
798 amdgpu_ring_commit(ring);
800 for (i = 0; i < adev->usec_timeout; i++) {
801 tmp = RREG32(scratch);
802 if (tmp == 0xDEADBEEF)
806 if (i < adev->usec_timeout) {
807 DRM_INFO("ring test on %d succeeded in %d usecs\n",
810 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
811 ring->idx, scratch, tmp);
814 amdgpu_gfx_scratch_free(adev, scratch);
818 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
820 struct amdgpu_device *adev = ring->adev;
822 struct dma_fence *f = NULL;
827 r = amdgpu_gfx_scratch_get(adev, &scratch);
829 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
832 WREG32(scratch, 0xCAFEDEAD);
833 memset(&ib, 0, sizeof(ib));
834 r = amdgpu_ib_get(adev, NULL, 256, &ib);
836 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
839 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
840 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
841 ib.ptr[2] = 0xDEADBEEF;
844 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
848 r = dma_fence_wait_timeout(f, false, timeout);
850 DRM_ERROR("amdgpu: IB test timed out.\n");
854 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
857 tmp = RREG32(scratch);
858 if (tmp == 0xDEADBEEF) {
859 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
862 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
867 amdgpu_ib_free(adev, &ib, NULL);
870 amdgpu_gfx_scratch_free(adev, scratch);
875 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
877 release_firmware(adev->gfx.pfp_fw);
878 adev->gfx.pfp_fw = NULL;
879 release_firmware(adev->gfx.me_fw);
880 adev->gfx.me_fw = NULL;
881 release_firmware(adev->gfx.ce_fw);
882 adev->gfx.ce_fw = NULL;
883 release_firmware(adev->gfx.rlc_fw);
884 adev->gfx.rlc_fw = NULL;
885 release_firmware(adev->gfx.mec_fw);
886 adev->gfx.mec_fw = NULL;
887 if ((adev->asic_type != CHIP_STONEY) &&
888 (adev->asic_type != CHIP_TOPAZ))
889 release_firmware(adev->gfx.mec2_fw);
890 adev->gfx.mec2_fw = NULL;
892 kfree(adev->gfx.rlc.register_list_format);
895 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
897 const char *chip_name;
900 struct amdgpu_firmware_info *info = NULL;
901 const struct common_firmware_header *header = NULL;
902 const struct gfx_firmware_header_v1_0 *cp_hdr;
903 const struct rlc_firmware_header_v2_0 *rlc_hdr;
904 unsigned int *tmp = NULL, i;
908 switch (adev->asic_type) {
916 chip_name = "carrizo";
922 chip_name = "polaris11";
925 chip_name = "polaris10";
928 chip_name = "polaris12";
931 chip_name = "stoney";
937 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
938 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
939 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
940 if (err == -ENOENT) {
941 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
942 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
945 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
946 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
950 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
953 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
954 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
957 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
958 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
959 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
960 if (err == -ENOENT) {
961 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
962 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
965 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
966 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
970 err = amdgpu_ucode_validate(adev->gfx.me_fw);
973 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
974 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
976 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
978 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
979 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
980 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
981 if (err == -ENOENT) {
982 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
983 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
986 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
987 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
991 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
994 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
995 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
996 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
999 * Support for MCBP/Virtualization in combination with chained IBs is
1000 * formal released on feature version #46
1002 if (adev->gfx.ce_feature_version >= 46 &&
1003 adev->gfx.pfp_feature_version >= 46) {
1004 adev->virt.chained_ib_support = true;
1005 DRM_INFO("Chained IB support enabled!\n");
1007 adev->virt.chained_ib_support = false;
1009 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1010 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1013 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1014 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1015 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1016 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1018 adev->gfx.rlc.save_and_restore_offset =
1019 le32_to_cpu(rlc_hdr->save_and_restore_offset);
1020 adev->gfx.rlc.clear_state_descriptor_offset =
1021 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1022 adev->gfx.rlc.avail_scratch_ram_locations =
1023 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1024 adev->gfx.rlc.reg_restore_list_size =
1025 le32_to_cpu(rlc_hdr->reg_restore_list_size);
1026 adev->gfx.rlc.reg_list_format_start =
1027 le32_to_cpu(rlc_hdr->reg_list_format_start);
1028 adev->gfx.rlc.reg_list_format_separate_start =
1029 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1030 adev->gfx.rlc.starting_offsets_start =
1031 le32_to_cpu(rlc_hdr->starting_offsets_start);
1032 adev->gfx.rlc.reg_list_format_size_bytes =
1033 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1034 adev->gfx.rlc.reg_list_size_bytes =
1035 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1037 adev->gfx.rlc.register_list_format =
1038 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1039 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1041 if (!adev->gfx.rlc.register_list_format) {
1046 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1047 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1048 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1049 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1051 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1053 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1054 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1055 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1056 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1058 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1059 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1060 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1061 if (err == -ENOENT) {
1062 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1063 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1066 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1067 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1071 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1074 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1076 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1078 if ((adev->asic_type != CHIP_STONEY) &&
1079 (adev->asic_type != CHIP_TOPAZ)) {
1080 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1081 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1082 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1083 if (err == -ENOENT) {
1084 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1085 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1088 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1089 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1092 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1095 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1096 adev->gfx.mec2_fw->data;
1097 adev->gfx.mec2_fw_version =
1098 le32_to_cpu(cp_hdr->header.ucode_version);
1099 adev->gfx.mec2_feature_version =
1100 le32_to_cpu(cp_hdr->ucode_feature_version);
1103 adev->gfx.mec2_fw = NULL;
1107 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1108 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1109 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1110 info->fw = adev->gfx.pfp_fw;
1111 header = (const struct common_firmware_header *)info->fw->data;
1112 adev->firmware.fw_size +=
1113 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1115 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1116 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1117 info->fw = adev->gfx.me_fw;
1118 header = (const struct common_firmware_header *)info->fw->data;
1119 adev->firmware.fw_size +=
1120 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1122 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1123 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1124 info->fw = adev->gfx.ce_fw;
1125 header = (const struct common_firmware_header *)info->fw->data;
1126 adev->firmware.fw_size +=
1127 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1129 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1130 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1131 info->fw = adev->gfx.rlc_fw;
1132 header = (const struct common_firmware_header *)info->fw->data;
1133 adev->firmware.fw_size +=
1134 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1136 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1137 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1138 info->fw = adev->gfx.mec_fw;
1139 header = (const struct common_firmware_header *)info->fw->data;
1140 adev->firmware.fw_size +=
1141 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1143 /* we need account JT in */
1144 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1145 adev->firmware.fw_size +=
1146 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1148 if (amdgpu_sriov_vf(adev)) {
1149 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1150 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1151 info->fw = adev->gfx.mec_fw;
1152 adev->firmware.fw_size +=
1153 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1156 if (adev->gfx.mec2_fw) {
1157 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1158 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1159 info->fw = adev->gfx.mec2_fw;
1160 header = (const struct common_firmware_header *)info->fw->data;
1161 adev->firmware.fw_size +=
1162 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170 "gfx8: Failed to load firmware \"%s\"\n",
1172 release_firmware(adev->gfx.pfp_fw);
1173 adev->gfx.pfp_fw = NULL;
1174 release_firmware(adev->gfx.me_fw);
1175 adev->gfx.me_fw = NULL;
1176 release_firmware(adev->gfx.ce_fw);
1177 adev->gfx.ce_fw = NULL;
1178 release_firmware(adev->gfx.rlc_fw);
1179 adev->gfx.rlc_fw = NULL;
1180 release_firmware(adev->gfx.mec_fw);
1181 adev->gfx.mec_fw = NULL;
1182 release_firmware(adev->gfx.mec2_fw);
1183 adev->gfx.mec2_fw = NULL;
1188 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1189 volatile u32 *buffer)
1192 const struct cs_section_def *sect = NULL;
1193 const struct cs_extent_def *ext = NULL;
1195 if (adev->gfx.rlc.cs_data == NULL)
1200 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1201 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1203 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1204 buffer[count++] = cpu_to_le32(0x80000000);
1205 buffer[count++] = cpu_to_le32(0x80000000);
1207 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1208 for (ext = sect->section; ext->extent != NULL; ++ext) {
1209 if (sect->id == SECT_CONTEXT) {
1211 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1212 buffer[count++] = cpu_to_le32(ext->reg_index -
1213 PACKET3_SET_CONTEXT_REG_START);
1214 for (i = 0; i < ext->reg_count; i++)
1215 buffer[count++] = cpu_to_le32(ext->extent[i]);
1222 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1223 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1224 PACKET3_SET_CONTEXT_REG_START);
1225 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1226 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1228 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1229 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1231 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1232 buffer[count++] = cpu_to_le32(0);
1235 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1237 const __le32 *fw_data;
1238 volatile u32 *dst_ptr;
1239 int me, i, max_me = 4;
1241 u32 table_offset, table_size;
1243 if (adev->asic_type == CHIP_CARRIZO)
1246 /* write the cp table buffer */
1247 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1248 for (me = 0; me < max_me; me++) {
1250 const struct gfx_firmware_header_v1_0 *hdr =
1251 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1252 fw_data = (const __le32 *)
1253 (adev->gfx.ce_fw->data +
1254 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1255 table_offset = le32_to_cpu(hdr->jt_offset);
1256 table_size = le32_to_cpu(hdr->jt_size);
1257 } else if (me == 1) {
1258 const struct gfx_firmware_header_v1_0 *hdr =
1259 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1260 fw_data = (const __le32 *)
1261 (adev->gfx.pfp_fw->data +
1262 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1263 table_offset = le32_to_cpu(hdr->jt_offset);
1264 table_size = le32_to_cpu(hdr->jt_size);
1265 } else if (me == 2) {
1266 const struct gfx_firmware_header_v1_0 *hdr =
1267 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1268 fw_data = (const __le32 *)
1269 (adev->gfx.me_fw->data +
1270 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1271 table_offset = le32_to_cpu(hdr->jt_offset);
1272 table_size = le32_to_cpu(hdr->jt_size);
1273 } else if (me == 3) {
1274 const struct gfx_firmware_header_v1_0 *hdr =
1275 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1276 fw_data = (const __le32 *)
1277 (adev->gfx.mec_fw->data +
1278 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1279 table_offset = le32_to_cpu(hdr->jt_offset);
1280 table_size = le32_to_cpu(hdr->jt_size);
1281 } else if (me == 4) {
1282 const struct gfx_firmware_header_v1_0 *hdr =
1283 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1284 fw_data = (const __le32 *)
1285 (adev->gfx.mec2_fw->data +
1286 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1287 table_offset = le32_to_cpu(hdr->jt_offset);
1288 table_size = le32_to_cpu(hdr->jt_size);
1291 for (i = 0; i < table_size; i ++) {
1292 dst_ptr[bo_offset + i] =
1293 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1296 bo_offset += table_size;
1300 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1302 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1303 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1306 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1308 volatile u32 *dst_ptr;
1310 const struct cs_section_def *cs_data;
1313 adev->gfx.rlc.cs_data = vi_cs_data;
1315 cs_data = adev->gfx.rlc.cs_data;
1318 /* clear state block */
1319 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1321 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1322 AMDGPU_GEM_DOMAIN_VRAM,
1323 &adev->gfx.rlc.clear_state_obj,
1324 &adev->gfx.rlc.clear_state_gpu_addr,
1325 (void **)&adev->gfx.rlc.cs_ptr);
1327 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1328 gfx_v8_0_rlc_fini(adev);
1332 /* set up the cs buffer */
1333 dst_ptr = adev->gfx.rlc.cs_ptr;
1334 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1335 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1336 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1339 if ((adev->asic_type == CHIP_CARRIZO) ||
1340 (adev->asic_type == CHIP_STONEY)) {
1341 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1342 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1343 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1344 &adev->gfx.rlc.cp_table_obj,
1345 &adev->gfx.rlc.cp_table_gpu_addr,
1346 (void **)&adev->gfx.rlc.cp_table_ptr);
1348 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1352 cz_init_cp_jump_table(adev);
1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1363 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1366 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1370 size_t mec_hpd_size;
1372 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1374 /* take ownership of the relevant compute queues */
1375 amdgpu_gfx_compute_queue_acquire(adev);
1377 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1379 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1380 AMDGPU_GEM_DOMAIN_GTT,
1381 &adev->gfx.mec.hpd_eop_obj,
1382 &adev->gfx.mec.hpd_eop_gpu_addr,
1385 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1389 memset(hpd, 0, mec_hpd_size);
1391 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1392 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1397 static const u32 vgpr_init_compute_shader[] =
1399 0x7e000209, 0x7e020208,
1400 0x7e040207, 0x7e060206,
1401 0x7e080205, 0x7e0a0204,
1402 0x7e0c0203, 0x7e0e0202,
1403 0x7e100201, 0x7e120200,
1404 0x7e140209, 0x7e160208,
1405 0x7e180207, 0x7e1a0206,
1406 0x7e1c0205, 0x7e1e0204,
1407 0x7e200203, 0x7e220202,
1408 0x7e240201, 0x7e260200,
1409 0x7e280209, 0x7e2a0208,
1410 0x7e2c0207, 0x7e2e0206,
1411 0x7e300205, 0x7e320204,
1412 0x7e340203, 0x7e360202,
1413 0x7e380201, 0x7e3a0200,
1414 0x7e3c0209, 0x7e3e0208,
1415 0x7e400207, 0x7e420206,
1416 0x7e440205, 0x7e460204,
1417 0x7e480203, 0x7e4a0202,
1418 0x7e4c0201, 0x7e4e0200,
1419 0x7e500209, 0x7e520208,
1420 0x7e540207, 0x7e560206,
1421 0x7e580205, 0x7e5a0204,
1422 0x7e5c0203, 0x7e5e0202,
1423 0x7e600201, 0x7e620200,
1424 0x7e640209, 0x7e660208,
1425 0x7e680207, 0x7e6a0206,
1426 0x7e6c0205, 0x7e6e0204,
1427 0x7e700203, 0x7e720202,
1428 0x7e740201, 0x7e760200,
1429 0x7e780209, 0x7e7a0208,
1430 0x7e7c0207, 0x7e7e0206,
1431 0xbf8a0000, 0xbf810000,
1434 static const u32 sgpr_init_compute_shader[] =
1436 0xbe8a0100, 0xbe8c0102,
1437 0xbe8e0104, 0xbe900106,
1438 0xbe920108, 0xbe940100,
1439 0xbe960102, 0xbe980104,
1440 0xbe9a0106, 0xbe9c0108,
1441 0xbe9e0100, 0xbea00102,
1442 0xbea20104, 0xbea40106,
1443 0xbea60108, 0xbea80100,
1444 0xbeaa0102, 0xbeac0104,
1445 0xbeae0106, 0xbeb00108,
1446 0xbeb20100, 0xbeb40102,
1447 0xbeb60104, 0xbeb80106,
1448 0xbeba0108, 0xbebc0100,
1449 0xbebe0102, 0xbec00104,
1450 0xbec20106, 0xbec40108,
1451 0xbec60100, 0xbec80102,
1452 0xbee60004, 0xbee70005,
1453 0xbeea0006, 0xbeeb0007,
1454 0xbee80008, 0xbee90009,
1455 0xbefc0000, 0xbf8a0000,
1456 0xbf810000, 0x00000000,
1459 static const u32 vgpr_init_regs[] =
1461 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1462 mmCOMPUTE_RESOURCE_LIMITS, 0,
1463 mmCOMPUTE_NUM_THREAD_X, 256*4,
1464 mmCOMPUTE_NUM_THREAD_Y, 1,
1465 mmCOMPUTE_NUM_THREAD_Z, 1,
1466 mmCOMPUTE_PGM_RSRC2, 20,
1467 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1468 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1469 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1470 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1471 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1472 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1473 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1474 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1475 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1476 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1479 static const u32 sgpr1_init_regs[] =
1481 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1482 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1483 mmCOMPUTE_NUM_THREAD_X, 256*5,
1484 mmCOMPUTE_NUM_THREAD_Y, 1,
1485 mmCOMPUTE_NUM_THREAD_Z, 1,
1486 mmCOMPUTE_PGM_RSRC2, 20,
1487 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1488 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1489 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1490 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1491 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1492 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1493 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1494 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1495 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1496 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1499 static const u32 sgpr2_init_regs[] =
1501 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1502 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1503 mmCOMPUTE_NUM_THREAD_X, 256*5,
1504 mmCOMPUTE_NUM_THREAD_Y, 1,
1505 mmCOMPUTE_NUM_THREAD_Z, 1,
1506 mmCOMPUTE_PGM_RSRC2, 20,
1507 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1508 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1509 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1510 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1511 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1512 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1513 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1514 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1515 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1516 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1519 static const u32 sec_ded_counter_registers[] =
1522 mmCPC_EDC_SCRATCH_CNT,
1523 mmCPC_EDC_UCODE_CNT,
1530 mmDC_EDC_CSINVOC_CNT,
1531 mmDC_EDC_RESTORE_CNT,
1537 mmSQC_ATC_EDC_GATCL1_CNT,
1543 mmTCP_ATC_EDC_GATCL1_CNT,
1548 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1550 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1551 struct amdgpu_ib ib;
1552 struct dma_fence *f = NULL;
1555 unsigned total_size, vgpr_offset, sgpr_offset;
1558 /* only supported on CZ */
1559 if (adev->asic_type != CHIP_CARRIZO)
1562 /* bail if the compute ring is not ready */
1566 tmp = RREG32(mmGB_EDC_MODE);
1567 WREG32(mmGB_EDC_MODE, 0);
1570 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1572 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1574 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1575 total_size = ALIGN(total_size, 256);
1576 vgpr_offset = total_size;
1577 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1578 sgpr_offset = total_size;
1579 total_size += sizeof(sgpr_init_compute_shader);
1581 /* allocate an indirect buffer to put the commands in */
1582 memset(&ib, 0, sizeof(ib));
1583 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1585 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1589 /* load the compute shaders */
1590 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1591 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1593 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1594 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1596 /* init the ib length to 0 */
1600 /* write the register state for the compute dispatch */
1601 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1602 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1603 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1604 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1606 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1607 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1608 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1609 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1610 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1611 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1613 /* write dispatch packet */
1614 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1615 ib.ptr[ib.length_dw++] = 8; /* x */
1616 ib.ptr[ib.length_dw++] = 1; /* y */
1617 ib.ptr[ib.length_dw++] = 1; /* z */
1618 ib.ptr[ib.length_dw++] =
1619 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1621 /* write CS partial flush packet */
1622 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1623 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1626 /* write the register state for the compute dispatch */
1627 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1628 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1630 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1632 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1634 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1639 /* write dispatch packet */
1640 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641 ib.ptr[ib.length_dw++] = 8; /* x */
1642 ib.ptr[ib.length_dw++] = 1; /* y */
1643 ib.ptr[ib.length_dw++] = 1; /* z */
1644 ib.ptr[ib.length_dw++] =
1645 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1647 /* write CS partial flush packet */
1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1652 /* write the register state for the compute dispatch */
1653 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1656 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1658 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1660 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1665 /* write dispatch packet */
1666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667 ib.ptr[ib.length_dw++] = 8; /* x */
1668 ib.ptr[ib.length_dw++] = 1; /* y */
1669 ib.ptr[ib.length_dw++] = 1; /* z */
1670 ib.ptr[ib.length_dw++] =
1671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1673 /* write CS partial flush packet */
1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1677 /* shedule the ib on the ring */
1678 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1680 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1684 /* wait for the GPU to finish processing the IB */
1685 r = dma_fence_wait(f, false);
1687 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1691 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1692 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1693 WREG32(mmGB_EDC_MODE, tmp);
1695 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1696 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1697 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1700 /* read back registers to clear the counters */
1701 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1702 RREG32(sec_ded_counter_registers[i]);
1705 amdgpu_ib_free(adev, &ib, NULL);
1711 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1714 u32 mc_shared_chmap, mc_arb_ramcfg;
1715 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1719 switch (adev->asic_type) {
1721 adev->gfx.config.max_shader_engines = 1;
1722 adev->gfx.config.max_tile_pipes = 2;
1723 adev->gfx.config.max_cu_per_sh = 6;
1724 adev->gfx.config.max_sh_per_se = 1;
1725 adev->gfx.config.max_backends_per_se = 2;
1726 adev->gfx.config.max_texture_channel_caches = 2;
1727 adev->gfx.config.max_gprs = 256;
1728 adev->gfx.config.max_gs_threads = 32;
1729 adev->gfx.config.max_hw_contexts = 8;
1731 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1738 adev->gfx.config.max_shader_engines = 4;
1739 adev->gfx.config.max_tile_pipes = 16;
1740 adev->gfx.config.max_cu_per_sh = 16;
1741 adev->gfx.config.max_sh_per_se = 1;
1742 adev->gfx.config.max_backends_per_se = 4;
1743 adev->gfx.config.max_texture_channel_caches = 16;
1744 adev->gfx.config.max_gprs = 256;
1745 adev->gfx.config.max_gs_threads = 32;
1746 adev->gfx.config.max_hw_contexts = 8;
1748 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1749 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1750 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1751 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1752 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1754 case CHIP_POLARIS11:
1755 case CHIP_POLARIS12:
1756 ret = amdgpu_atombios_get_gfx_info(adev);
1759 adev->gfx.config.max_gprs = 256;
1760 adev->gfx.config.max_gs_threads = 32;
1761 adev->gfx.config.max_hw_contexts = 8;
1763 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1769 case CHIP_POLARIS10:
1770 ret = amdgpu_atombios_get_gfx_info(adev);
1773 adev->gfx.config.max_gprs = 256;
1774 adev->gfx.config.max_gs_threads = 32;
1775 adev->gfx.config.max_hw_contexts = 8;
1777 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1784 adev->gfx.config.max_shader_engines = 4;
1785 adev->gfx.config.max_tile_pipes = 8;
1786 adev->gfx.config.max_cu_per_sh = 8;
1787 adev->gfx.config.max_sh_per_se = 1;
1788 adev->gfx.config.max_backends_per_se = 2;
1789 adev->gfx.config.max_texture_channel_caches = 8;
1790 adev->gfx.config.max_gprs = 256;
1791 adev->gfx.config.max_gs_threads = 32;
1792 adev->gfx.config.max_hw_contexts = 8;
1794 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1801 adev->gfx.config.max_shader_engines = 1;
1802 adev->gfx.config.max_tile_pipes = 2;
1803 adev->gfx.config.max_sh_per_se = 1;
1804 adev->gfx.config.max_backends_per_se = 2;
1805 adev->gfx.config.max_cu_per_sh = 8;
1806 adev->gfx.config.max_texture_channel_caches = 2;
1807 adev->gfx.config.max_gprs = 256;
1808 adev->gfx.config.max_gs_threads = 32;
1809 adev->gfx.config.max_hw_contexts = 8;
1811 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1818 adev->gfx.config.max_shader_engines = 1;
1819 adev->gfx.config.max_tile_pipes = 2;
1820 adev->gfx.config.max_sh_per_se = 1;
1821 adev->gfx.config.max_backends_per_se = 1;
1822 adev->gfx.config.max_cu_per_sh = 3;
1823 adev->gfx.config.max_texture_channel_caches = 2;
1824 adev->gfx.config.max_gprs = 256;
1825 adev->gfx.config.max_gs_threads = 16;
1826 adev->gfx.config.max_hw_contexts = 8;
1828 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1835 adev->gfx.config.max_shader_engines = 2;
1836 adev->gfx.config.max_tile_pipes = 4;
1837 adev->gfx.config.max_cu_per_sh = 2;
1838 adev->gfx.config.max_sh_per_se = 1;
1839 adev->gfx.config.max_backends_per_se = 2;
1840 adev->gfx.config.max_texture_channel_caches = 4;
1841 adev->gfx.config.max_gprs = 256;
1842 adev->gfx.config.max_gs_threads = 32;
1843 adev->gfx.config.max_hw_contexts = 8;
1845 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1853 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1854 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1855 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1857 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1858 adev->gfx.config.mem_max_burst_length_bytes = 256;
1859 if (adev->flags & AMD_IS_APU) {
1860 /* Get memory bank mapping mode. */
1861 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1862 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1863 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1865 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1866 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1867 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1869 /* Validate settings in case only one DIMM installed. */
1870 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1871 dimm00_addr_map = 0;
1872 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1873 dimm01_addr_map = 0;
1874 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1875 dimm10_addr_map = 0;
1876 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1877 dimm11_addr_map = 0;
1879 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1880 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1881 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1882 adev->gfx.config.mem_row_size_in_kb = 2;
1884 adev->gfx.config.mem_row_size_in_kb = 1;
1886 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1887 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1888 if (adev->gfx.config.mem_row_size_in_kb > 4)
1889 adev->gfx.config.mem_row_size_in_kb = 4;
1892 adev->gfx.config.shader_engine_tile_size = 32;
1893 adev->gfx.config.num_gpus = 1;
1894 adev->gfx.config.multi_gpu_tile_size = 64;
1896 /* fix up row size */
1897 switch (adev->gfx.config.mem_row_size_in_kb) {
1900 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1903 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1906 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1909 adev->gfx.config.gb_addr_config = gb_addr_config;
1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1915 int mec, int pipe, int queue)
1919 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1921 ring = &adev->gfx.compute_ring[ring_id];
1926 ring->queue = queue;
1928 ring->ring_obj = NULL;
1929 ring->use_doorbell = true;
1930 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1931 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1932 + (ring_id * GFX8_MEC_HPD_SIZE);
1933 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1935 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1936 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1939 /* type-2 packets are deprecated on MEC, use type-3 instead */
1940 r = amdgpu_ring_init(adev, ring, 1024,
1941 &adev->gfx.eop_irq, irq_type);
1949 static int gfx_v8_0_sw_init(void *handle)
1951 int i, j, k, r, ring_id;
1952 struct amdgpu_ring *ring;
1953 struct amdgpu_kiq *kiq;
1954 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1956 switch (adev->asic_type) {
1959 case CHIP_POLARIS11:
1960 case CHIP_POLARIS12:
1961 case CHIP_POLARIS10:
1963 adev->gfx.mec.num_mec = 2;
1968 adev->gfx.mec.num_mec = 1;
1972 adev->gfx.mec.num_pipe_per_mec = 4;
1973 adev->gfx.mec.num_queue_per_pipe = 8;
1976 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1981 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1985 /* Privileged reg */
1986 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1987 &adev->gfx.priv_reg_irq);
1991 /* Privileged inst */
1992 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1993 &adev->gfx.priv_inst_irq);
1997 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1999 gfx_v8_0_scratch_init(adev);
2001 r = gfx_v8_0_init_microcode(adev);
2003 DRM_ERROR("Failed to load gfx firmware!\n");
2007 r = gfx_v8_0_rlc_init(adev);
2009 DRM_ERROR("Failed to init rlc BOs!\n");
2013 r = gfx_v8_0_mec_init(adev);
2015 DRM_ERROR("Failed to init MEC BOs!\n");
2019 /* set up the gfx ring */
2020 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2021 ring = &adev->gfx.gfx_ring[i];
2022 ring->ring_obj = NULL;
2023 sprintf(ring->name, "gfx");
2024 /* no gfx doorbells on iceland */
2025 if (adev->asic_type != CHIP_TOPAZ) {
2026 ring->use_doorbell = true;
2027 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2030 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2031 AMDGPU_CP_IRQ_GFX_EOP);
2037 /* set up the compute queues - allocate horizontally across pipes */
2039 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2040 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2041 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2042 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2045 r = gfx_v8_0_compute_ring_init(adev,
2056 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2058 DRM_ERROR("Failed to init KIQ BOs!\n");
2062 kiq = &adev->gfx.kiq;
2063 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2067 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2068 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2072 /* reserve GDS, GWS and OA resource for gfx */
2073 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2074 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2075 &adev->gds.gds_gfx_bo, NULL, NULL);
2079 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2080 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2081 &adev->gds.gws_gfx_bo, NULL, NULL);
2085 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2086 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2087 &adev->gds.oa_gfx_bo, NULL, NULL);
2091 adev->gfx.ce_ram_size = 0x8000;
2093 r = gfx_v8_0_gpu_early_init(adev);
2100 static int gfx_v8_0_sw_fini(void *handle)
2103 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2105 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2106 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2107 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2109 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2110 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2111 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2112 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2114 amdgpu_gfx_compute_mqd_sw_fini(adev);
2115 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2116 amdgpu_gfx_kiq_fini(adev);
2117 amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL);
2119 gfx_v8_0_mec_fini(adev);
2120 gfx_v8_0_rlc_fini(adev);
2121 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2122 &adev->gfx.rlc.clear_state_gpu_addr,
2123 (void **)&adev->gfx.rlc.cs_ptr);
2124 if ((adev->asic_type == CHIP_CARRIZO) ||
2125 (adev->asic_type == CHIP_STONEY)) {
2126 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2127 &adev->gfx.rlc.cp_table_gpu_addr,
2128 (void **)&adev->gfx.rlc.cp_table_ptr);
2130 gfx_v8_0_free_microcode(adev);
2135 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2137 uint32_t *modearray, *mod2array;
2138 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2139 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2142 modearray = adev->gfx.config.tile_mode_array;
2143 mod2array = adev->gfx.config.macrotile_mode_array;
2145 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2146 modearray[reg_offset] = 0;
2148 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2149 mod2array[reg_offset] = 0;
2151 switch (adev->asic_type) {
2153 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2154 PIPE_CONFIG(ADDR_SURF_P2) |
2155 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2156 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2157 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2158 PIPE_CONFIG(ADDR_SURF_P2) |
2159 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2160 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2161 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2162 PIPE_CONFIG(ADDR_SURF_P2) |
2163 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2165 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2166 PIPE_CONFIG(ADDR_SURF_P2) |
2167 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2168 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2169 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2170 PIPE_CONFIG(ADDR_SURF_P2) |
2171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2173 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2174 PIPE_CONFIG(ADDR_SURF_P2) |
2175 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2177 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2178 PIPE_CONFIG(ADDR_SURF_P2) |
2179 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2180 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2181 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2182 PIPE_CONFIG(ADDR_SURF_P2));
2183 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2184 PIPE_CONFIG(ADDR_SURF_P2) |
2185 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2188 PIPE_CONFIG(ADDR_SURF_P2) |
2189 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2191 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2192 PIPE_CONFIG(ADDR_SURF_P2) |
2193 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2195 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2196 PIPE_CONFIG(ADDR_SURF_P2) |
2197 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2199 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2200 PIPE_CONFIG(ADDR_SURF_P2) |
2201 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2203 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2204 PIPE_CONFIG(ADDR_SURF_P2) |
2205 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2207 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2208 PIPE_CONFIG(ADDR_SURF_P2) |
2209 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2211 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2212 PIPE_CONFIG(ADDR_SURF_P2) |
2213 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2215 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2216 PIPE_CONFIG(ADDR_SURF_P2) |
2217 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2219 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2220 PIPE_CONFIG(ADDR_SURF_P2) |
2221 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2223 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2224 PIPE_CONFIG(ADDR_SURF_P2) |
2225 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2227 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2228 PIPE_CONFIG(ADDR_SURF_P2) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2231 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2232 PIPE_CONFIG(ADDR_SURF_P2) |
2233 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2235 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2236 PIPE_CONFIG(ADDR_SURF_P2) |
2237 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2239 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2240 PIPE_CONFIG(ADDR_SURF_P2) |
2241 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2243 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2244 PIPE_CONFIG(ADDR_SURF_P2) |
2245 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2247 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2248 PIPE_CONFIG(ADDR_SURF_P2) |
2249 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2251 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2252 PIPE_CONFIG(ADDR_SURF_P2) |
2253 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2256 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2257 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2258 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2259 NUM_BANKS(ADDR_SURF_8_BANK));
2260 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2263 NUM_BANKS(ADDR_SURF_8_BANK));
2264 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2267 NUM_BANKS(ADDR_SURF_8_BANK));
2268 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2271 NUM_BANKS(ADDR_SURF_8_BANK));
2272 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2275 NUM_BANKS(ADDR_SURF_8_BANK));
2276 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2279 NUM_BANKS(ADDR_SURF_8_BANK));
2280 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2283 NUM_BANKS(ADDR_SURF_8_BANK));
2284 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287 NUM_BANKS(ADDR_SURF_16_BANK));
2288 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2291 NUM_BANKS(ADDR_SURF_16_BANK));
2292 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2295 NUM_BANKS(ADDR_SURF_16_BANK));
2296 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2299 NUM_BANKS(ADDR_SURF_16_BANK));
2300 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2303 NUM_BANKS(ADDR_SURF_16_BANK));
2304 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2307 NUM_BANKS(ADDR_SURF_16_BANK));
2308 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2309 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2310 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2311 NUM_BANKS(ADDR_SURF_8_BANK));
2313 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2314 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2316 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2318 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2319 if (reg_offset != 7)
2320 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2324 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2327 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2328 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2332 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2336 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2339 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2340 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2344 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2345 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2352 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2356 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2358 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2359 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2362 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2363 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2366 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2367 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2370 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2371 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2372 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2374 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2375 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2378 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2381 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2383 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2387 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2390 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2391 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2394 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2395 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2398 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2402 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2403 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2406 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2407 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2410 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2411 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2414 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2415 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2416 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2418 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2422 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2426 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2427 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2430 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2442 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2444 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2447 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2449 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2450 NUM_BANKS(ADDR_SURF_8_BANK));
2451 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2454 NUM_BANKS(ADDR_SURF_8_BANK));
2455 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2457 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2458 NUM_BANKS(ADDR_SURF_8_BANK));
2459 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2462 NUM_BANKS(ADDR_SURF_8_BANK));
2463 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2466 NUM_BANKS(ADDR_SURF_8_BANK));
2467 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470 NUM_BANKS(ADDR_SURF_8_BANK));
2471 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2474 NUM_BANKS(ADDR_SURF_8_BANK));
2475 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2478 NUM_BANKS(ADDR_SURF_8_BANK));
2479 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2482 NUM_BANKS(ADDR_SURF_8_BANK));
2483 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2486 NUM_BANKS(ADDR_SURF_8_BANK));
2487 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2490 NUM_BANKS(ADDR_SURF_8_BANK));
2491 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2494 NUM_BANKS(ADDR_SURF_8_BANK));
2495 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2498 NUM_BANKS(ADDR_SURF_8_BANK));
2499 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2502 NUM_BANKS(ADDR_SURF_4_BANK));
2504 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2505 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2507 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2508 if (reg_offset != 7)
2509 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2513 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2516 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2517 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2520 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2521 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2524 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2525 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2528 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2529 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2532 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2533 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2536 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2537 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2540 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2541 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2544 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2545 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2547 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2548 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2550 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2551 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2554 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2559 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2560 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2561 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2562 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2563 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2567 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2571 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2578 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2579 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2581 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2583 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2584 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2587 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2590 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2591 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2592 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2594 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2595 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2596 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2598 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2599 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2603 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2604 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2606 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2607 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2608 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2610 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2611 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2612 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2614 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2615 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2616 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2618 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2619 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2622 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2623 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2626 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2630 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2631 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2633 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2634 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2636 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2638 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2639 NUM_BANKS(ADDR_SURF_16_BANK));
2640 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2642 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2643 NUM_BANKS(ADDR_SURF_16_BANK));
2644 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2647 NUM_BANKS(ADDR_SURF_16_BANK));
2648 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2650 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2651 NUM_BANKS(ADDR_SURF_16_BANK));
2652 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2654 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2655 NUM_BANKS(ADDR_SURF_16_BANK));
2656 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2659 NUM_BANKS(ADDR_SURF_16_BANK));
2660 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2663 NUM_BANKS(ADDR_SURF_16_BANK));
2664 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2667 NUM_BANKS(ADDR_SURF_16_BANK));
2668 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2671 NUM_BANKS(ADDR_SURF_16_BANK));
2672 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2674 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2675 NUM_BANKS(ADDR_SURF_16_BANK));
2676 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2679 NUM_BANKS(ADDR_SURF_16_BANK));
2680 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2683 NUM_BANKS(ADDR_SURF_8_BANK));
2684 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687 NUM_BANKS(ADDR_SURF_4_BANK));
2688 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2690 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2691 NUM_BANKS(ADDR_SURF_4_BANK));
2693 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2694 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2696 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2697 if (reg_offset != 7)
2698 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2701 case CHIP_POLARIS11:
2702 case CHIP_POLARIS12:
2703 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2706 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2707 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2710 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2711 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2714 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2715 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2718 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2719 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2720 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2722 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2723 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2724 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2726 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2727 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2730 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2731 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2734 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2735 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2736 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2737 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2738 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2745 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2749 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2750 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2753 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2754 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2756 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2757 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2760 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2761 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2765 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2766 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2769 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2770 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2772 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2773 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2774 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2777 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2778 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2781 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2782 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2784 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2785 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2786 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2788 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2789 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2790 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2792 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2793 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2794 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2796 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2797 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2798 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2800 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2801 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2802 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2804 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2805 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2806 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2808 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2809 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2812 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2813 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2816 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2817 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2821 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2826 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829 NUM_BANKS(ADDR_SURF_16_BANK));
2831 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834 NUM_BANKS(ADDR_SURF_16_BANK));
2836 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2838 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839 NUM_BANKS(ADDR_SURF_16_BANK));
2841 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2843 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844 NUM_BANKS(ADDR_SURF_16_BANK));
2846 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849 NUM_BANKS(ADDR_SURF_16_BANK));
2851 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2853 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2854 NUM_BANKS(ADDR_SURF_16_BANK));
2856 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2859 NUM_BANKS(ADDR_SURF_16_BANK));
2861 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864 NUM_BANKS(ADDR_SURF_16_BANK));
2866 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2867 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869 NUM_BANKS(ADDR_SURF_16_BANK));
2871 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2873 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2874 NUM_BANKS(ADDR_SURF_16_BANK));
2876 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2878 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2879 NUM_BANKS(ADDR_SURF_16_BANK));
2881 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2883 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2884 NUM_BANKS(ADDR_SURF_16_BANK));
2886 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2889 NUM_BANKS(ADDR_SURF_8_BANK));
2891 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2894 NUM_BANKS(ADDR_SURF_4_BANK));
2896 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2897 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2899 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2900 if (reg_offset != 7)
2901 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2904 case CHIP_POLARIS10:
2905 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2908 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2909 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2912 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2913 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2916 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2917 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2920 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2921 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2922 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2924 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2928 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2929 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2932 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2933 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2936 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2937 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2939 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2944 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2946 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2951 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2953 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2955 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2956 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2960 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2962 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2970 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2971 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2972 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2973 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2975 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2976 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2978 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2979 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2983 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2984 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2986 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2987 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2990 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2991 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2992 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2993 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2994 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2995 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2996 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2997 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2998 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2999 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3002 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3004 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3008 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3010 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3011 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3014 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3018 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3019 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3021 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3023 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3024 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3025 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3026 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3028 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3030 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031 NUM_BANKS(ADDR_SURF_16_BANK));
3033 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3035 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036 NUM_BANKS(ADDR_SURF_16_BANK));
3038 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3041 NUM_BANKS(ADDR_SURF_16_BANK));
3043 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3045 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3046 NUM_BANKS(ADDR_SURF_16_BANK));
3048 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3050 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3051 NUM_BANKS(ADDR_SURF_16_BANK));
3053 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3056 NUM_BANKS(ADDR_SURF_16_BANK));
3058 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3060 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3061 NUM_BANKS(ADDR_SURF_16_BANK));
3063 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3065 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066 NUM_BANKS(ADDR_SURF_16_BANK));
3068 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3070 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3071 NUM_BANKS(ADDR_SURF_16_BANK));
3073 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3075 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076 NUM_BANKS(ADDR_SURF_16_BANK));
3078 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3081 NUM_BANKS(ADDR_SURF_16_BANK));
3083 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3085 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3086 NUM_BANKS(ADDR_SURF_8_BANK));
3088 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3091 NUM_BANKS(ADDR_SURF_4_BANK));
3093 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3096 NUM_BANKS(ADDR_SURF_4_BANK));
3098 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3099 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3101 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3102 if (reg_offset != 7)
3103 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3107 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108 PIPE_CONFIG(ADDR_SURF_P2) |
3109 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3110 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3111 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3112 PIPE_CONFIG(ADDR_SURF_P2) |
3113 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3114 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3115 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116 PIPE_CONFIG(ADDR_SURF_P2) |
3117 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3119 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120 PIPE_CONFIG(ADDR_SURF_P2) |
3121 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3122 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3123 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124 PIPE_CONFIG(ADDR_SURF_P2) |
3125 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3126 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3127 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3128 PIPE_CONFIG(ADDR_SURF_P2) |
3129 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3131 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3132 PIPE_CONFIG(ADDR_SURF_P2) |
3133 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3134 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3135 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3136 PIPE_CONFIG(ADDR_SURF_P2));
3137 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3138 PIPE_CONFIG(ADDR_SURF_P2) |
3139 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3142 PIPE_CONFIG(ADDR_SURF_P2) |
3143 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3145 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3146 PIPE_CONFIG(ADDR_SURF_P2) |
3147 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3149 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3150 PIPE_CONFIG(ADDR_SURF_P2) |
3151 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3153 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3154 PIPE_CONFIG(ADDR_SURF_P2) |
3155 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3156 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3157 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3158 PIPE_CONFIG(ADDR_SURF_P2) |
3159 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3161 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3162 PIPE_CONFIG(ADDR_SURF_P2) |
3163 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3164 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3165 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3166 PIPE_CONFIG(ADDR_SURF_P2) |
3167 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3169 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3170 PIPE_CONFIG(ADDR_SURF_P2) |
3171 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3172 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3173 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3174 PIPE_CONFIG(ADDR_SURF_P2) |
3175 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3177 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3178 PIPE_CONFIG(ADDR_SURF_P2) |
3179 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3181 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3182 PIPE_CONFIG(ADDR_SURF_P2) |
3183 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3185 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3186 PIPE_CONFIG(ADDR_SURF_P2) |
3187 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3189 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3190 PIPE_CONFIG(ADDR_SURF_P2) |
3191 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3193 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3194 PIPE_CONFIG(ADDR_SURF_P2) |
3195 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3197 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3198 PIPE_CONFIG(ADDR_SURF_P2) |
3199 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3201 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3202 PIPE_CONFIG(ADDR_SURF_P2) |
3203 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3205 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3206 PIPE_CONFIG(ADDR_SURF_P2) |
3207 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3210 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3213 NUM_BANKS(ADDR_SURF_8_BANK));
3214 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3215 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3216 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3217 NUM_BANKS(ADDR_SURF_8_BANK));
3218 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3221 NUM_BANKS(ADDR_SURF_8_BANK));
3222 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3223 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3224 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3225 NUM_BANKS(ADDR_SURF_8_BANK));
3226 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3227 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3228 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3229 NUM_BANKS(ADDR_SURF_8_BANK));
3230 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3233 NUM_BANKS(ADDR_SURF_8_BANK));
3234 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3237 NUM_BANKS(ADDR_SURF_8_BANK));
3238 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241 NUM_BANKS(ADDR_SURF_16_BANK));
3242 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3245 NUM_BANKS(ADDR_SURF_16_BANK));
3246 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3249 NUM_BANKS(ADDR_SURF_16_BANK));
3250 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3251 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3252 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253 NUM_BANKS(ADDR_SURF_16_BANK));
3254 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3257 NUM_BANKS(ADDR_SURF_16_BANK));
3258 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3261 NUM_BANKS(ADDR_SURF_16_BANK));
3262 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3265 NUM_BANKS(ADDR_SURF_8_BANK));
3267 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3268 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3270 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3272 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3273 if (reg_offset != 7)
3274 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3279 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3283 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3284 PIPE_CONFIG(ADDR_SURF_P2) |
3285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3287 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288 PIPE_CONFIG(ADDR_SURF_P2) |
3289 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3291 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292 PIPE_CONFIG(ADDR_SURF_P2) |
3293 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3294 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3295 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3296 PIPE_CONFIG(ADDR_SURF_P2) |
3297 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3298 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3299 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3300 PIPE_CONFIG(ADDR_SURF_P2) |
3301 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3303 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3304 PIPE_CONFIG(ADDR_SURF_P2) |
3305 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3306 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3307 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3308 PIPE_CONFIG(ADDR_SURF_P2) |
3309 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3310 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3311 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3312 PIPE_CONFIG(ADDR_SURF_P2));
3313 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3314 PIPE_CONFIG(ADDR_SURF_P2) |
3315 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3317 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3318 PIPE_CONFIG(ADDR_SURF_P2) |
3319 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3321 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3322 PIPE_CONFIG(ADDR_SURF_P2) |
3323 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3325 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3326 PIPE_CONFIG(ADDR_SURF_P2) |
3327 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3329 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3330 PIPE_CONFIG(ADDR_SURF_P2) |
3331 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3333 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3334 PIPE_CONFIG(ADDR_SURF_P2) |
3335 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3337 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3338 PIPE_CONFIG(ADDR_SURF_P2) |
3339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3341 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3342 PIPE_CONFIG(ADDR_SURF_P2) |
3343 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3345 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3346 PIPE_CONFIG(ADDR_SURF_P2) |
3347 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3349 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3350 PIPE_CONFIG(ADDR_SURF_P2) |
3351 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3353 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3354 PIPE_CONFIG(ADDR_SURF_P2) |
3355 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3357 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3358 PIPE_CONFIG(ADDR_SURF_P2) |
3359 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3361 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3362 PIPE_CONFIG(ADDR_SURF_P2) |
3363 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3365 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3366 PIPE_CONFIG(ADDR_SURF_P2) |
3367 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3369 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3370 PIPE_CONFIG(ADDR_SURF_P2) |
3371 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3373 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3374 PIPE_CONFIG(ADDR_SURF_P2) |
3375 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3377 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3378 PIPE_CONFIG(ADDR_SURF_P2) |
3379 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3381 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3382 PIPE_CONFIG(ADDR_SURF_P2) |
3383 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3386 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3389 NUM_BANKS(ADDR_SURF_8_BANK));
3390 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3393 NUM_BANKS(ADDR_SURF_8_BANK));
3394 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3397 NUM_BANKS(ADDR_SURF_8_BANK));
3398 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3401 NUM_BANKS(ADDR_SURF_8_BANK));
3402 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3405 NUM_BANKS(ADDR_SURF_8_BANK));
3406 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3409 NUM_BANKS(ADDR_SURF_8_BANK));
3410 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3413 NUM_BANKS(ADDR_SURF_8_BANK));
3414 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417 NUM_BANKS(ADDR_SURF_16_BANK));
3418 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3421 NUM_BANKS(ADDR_SURF_16_BANK));
3422 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3425 NUM_BANKS(ADDR_SURF_16_BANK));
3426 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3429 NUM_BANKS(ADDR_SURF_16_BANK));
3430 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3433 NUM_BANKS(ADDR_SURF_16_BANK));
3434 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3437 NUM_BANKS(ADDR_SURF_16_BANK));
3438 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3439 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3440 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3441 NUM_BANKS(ADDR_SURF_8_BANK));
3443 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3444 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3446 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3448 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3449 if (reg_offset != 7)
3450 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3456 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3457 u32 se_num, u32 sh_num, u32 instance)
3461 if (instance == 0xffffffff)
3462 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3464 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3466 if (se_num == 0xffffffff)
3467 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3469 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3471 if (sh_num == 0xffffffff)
3472 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3474 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3476 WREG32(mmGRBM_GFX_INDEX, data);
3479 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3483 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3484 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3486 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3488 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3489 adev->gfx.config.max_sh_per_se);
3491 return (~data) & mask;
3495 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3497 switch (adev->asic_type) {
3499 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3500 RB_XSEL2(1) | PKR_MAP(2) |
3501 PKR_XSEL(1) | PKR_YSEL(1) |
3502 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3503 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3507 case CHIP_POLARIS10:
3508 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3509 SE_XSEL(1) | SE_YSEL(1);
3510 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3515 *rconf |= RB_MAP_PKR0(2);
3518 case CHIP_POLARIS11:
3519 case CHIP_POLARIS12:
3520 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3521 SE_XSEL(1) | SE_YSEL(1);
3529 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3535 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3536 u32 raster_config, u32 raster_config_1,
3537 unsigned rb_mask, unsigned num_rb)
3539 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3540 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3541 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3542 unsigned rb_per_se = num_rb / num_se;
3543 unsigned se_mask[4];
3546 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3547 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3548 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3549 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3551 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3552 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3553 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3555 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3556 (!se_mask[2] && !se_mask[3]))) {
3557 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3559 if (!se_mask[0] && !se_mask[1]) {
3561 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3564 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3568 for (se = 0; se < num_se; se++) {
3569 unsigned raster_config_se = raster_config;
3570 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3571 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3572 int idx = (se / 2) * 2;
3574 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3575 raster_config_se &= ~SE_MAP_MASK;
3577 if (!se_mask[idx]) {
3578 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3580 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3584 pkr0_mask &= rb_mask;
3585 pkr1_mask &= rb_mask;
3586 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3587 raster_config_se &= ~PKR_MAP_MASK;
3590 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3592 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3596 if (rb_per_se >= 2) {
3597 unsigned rb0_mask = 1 << (se * rb_per_se);
3598 unsigned rb1_mask = rb0_mask << 1;
3600 rb0_mask &= rb_mask;
3601 rb1_mask &= rb_mask;
3602 if (!rb0_mask || !rb1_mask) {
3603 raster_config_se &= ~RB_MAP_PKR0_MASK;
3607 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3610 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3614 if (rb_per_se > 2) {
3615 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3616 rb1_mask = rb0_mask << 1;
3617 rb0_mask &= rb_mask;
3618 rb1_mask &= rb_mask;
3619 if (!rb0_mask || !rb1_mask) {
3620 raster_config_se &= ~RB_MAP_PKR1_MASK;
3624 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3627 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3633 /* GRBM_GFX_INDEX has a different offset on VI */
3634 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3635 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3636 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3639 /* GRBM_GFX_INDEX has a different offset on VI */
3640 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3643 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3647 u32 raster_config = 0, raster_config_1 = 0;
3649 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3650 adev->gfx.config.max_sh_per_se;
3651 unsigned num_rb_pipes;
3653 mutex_lock(&adev->grbm_idx_mutex);
3654 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3655 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3656 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3657 data = gfx_v8_0_get_rb_active_bitmap(adev);
3658 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3659 rb_bitmap_width_per_sh);
3662 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3664 adev->gfx.config.backend_enable_mask = active_rbs;
3665 adev->gfx.config.num_rbs = hweight32(active_rbs);
3667 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3668 adev->gfx.config.max_shader_engines, 16);
3670 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3672 if (!adev->gfx.config.backend_enable_mask ||
3673 adev->gfx.config.num_rbs >= num_rb_pipes) {
3674 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3675 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3677 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3678 adev->gfx.config.backend_enable_mask,
3682 /* cache the values for userspace */
3683 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3684 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3685 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3686 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3687 RREG32(mmCC_RB_BACKEND_DISABLE);
3688 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3689 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3690 adev->gfx.config.rb_config[i][j].raster_config =
3691 RREG32(mmPA_SC_RASTER_CONFIG);
3692 adev->gfx.config.rb_config[i][j].raster_config_1 =
3693 RREG32(mmPA_SC_RASTER_CONFIG_1);
3696 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3697 mutex_unlock(&adev->grbm_idx_mutex);
3701 * gfx_v8_0_init_compute_vmid - gart enable
3703 * @adev: amdgpu_device pointer
3705 * Initialize compute vmid sh_mem registers
3708 #define DEFAULT_SH_MEM_BASES (0x6000)
3709 #define FIRST_COMPUTE_VMID (8)
3710 #define LAST_COMPUTE_VMID (16)
3711 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3714 uint32_t sh_mem_config;
3715 uint32_t sh_mem_bases;
3718 * Configure apertures:
3719 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3720 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3721 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3723 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3725 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3726 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3727 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3728 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3729 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3730 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3732 mutex_lock(&adev->srbm_mutex);
3733 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3734 vi_srbm_select(adev, 0, 0, 0, i);
3735 /* CP and shaders */
3736 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3737 WREG32(mmSH_MEM_APE1_BASE, 1);
3738 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3739 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3741 vi_srbm_select(adev, 0, 0, 0, 0);
3742 mutex_unlock(&adev->srbm_mutex);
3745 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3747 switch (adev->asic_type) {
3749 adev->gfx.config.double_offchip_lds_buf = 1;
3753 adev->gfx.config.double_offchip_lds_buf = 0;
3758 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3760 u32 tmp, sh_static_mem_cfg;
3763 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3764 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3765 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3766 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3768 gfx_v8_0_tiling_mode_table_init(adev);
3769 gfx_v8_0_setup_rb(adev);
3770 gfx_v8_0_get_cu_info(adev);
3771 gfx_v8_0_config_init(adev);
3773 /* XXX SH_MEM regs */
3774 /* where to put LDS, scratch, GPUVM in FSA64 space */
3775 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3777 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3779 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3781 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3783 mutex_lock(&adev->srbm_mutex);
3784 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3785 vi_srbm_select(adev, 0, 0, 0, i);
3786 /* CP and shaders */
3788 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3789 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3790 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3791 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3792 WREG32(mmSH_MEM_CONFIG, tmp);
3793 WREG32(mmSH_MEM_BASES, 0);
3795 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3796 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3797 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3798 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3799 WREG32(mmSH_MEM_CONFIG, tmp);
3800 tmp = adev->mc.shared_aperture_start >> 48;
3801 WREG32(mmSH_MEM_BASES, tmp);
3804 WREG32(mmSH_MEM_APE1_BASE, 1);
3805 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3807 vi_srbm_select(adev, 0, 0, 0, 0);
3808 mutex_unlock(&adev->srbm_mutex);
3810 gfx_v8_0_init_compute_vmid(adev);
3812 mutex_lock(&adev->grbm_idx_mutex);
3814 * making sure that the following register writes will be broadcasted
3815 * to all the shaders
3817 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3819 WREG32(mmPA_SC_FIFO_SIZE,
3820 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3821 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3822 (adev->gfx.config.sc_prim_fifo_size_backend <<
3823 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3824 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3825 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3826 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3827 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3829 tmp = RREG32(mmSPI_ARB_PRIORITY);
3830 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3831 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3832 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3833 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3834 WREG32(mmSPI_ARB_PRIORITY, tmp);
3836 mutex_unlock(&adev->grbm_idx_mutex);
3840 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3845 mutex_lock(&adev->grbm_idx_mutex);
3846 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3847 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3848 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3849 for (k = 0; k < adev->usec_timeout; k++) {
3850 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3856 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3857 mutex_unlock(&adev->grbm_idx_mutex);
3859 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3860 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3861 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3862 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3863 for (k = 0; k < adev->usec_timeout; k++) {
3864 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3870 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3873 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3875 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3876 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3877 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3878 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3880 WREG32(mmCP_INT_CNTL_RING0, tmp);
3883 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3886 WREG32(mmRLC_CSIB_ADDR_HI,
3887 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3888 WREG32(mmRLC_CSIB_ADDR_LO,
3889 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3890 WREG32(mmRLC_CSIB_LENGTH,
3891 adev->gfx.rlc.clear_state_size);
3894 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3897 int *unique_indices,
3900 int *ind_start_offsets,
3905 bool new_entry = true;
3907 for (; ind_offset < list_size; ind_offset++) {
3911 ind_start_offsets[*offset_count] = ind_offset;
3912 *offset_count = *offset_count + 1;
3913 BUG_ON(*offset_count >= max_offset);
3916 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3923 /* look for the matching indice */
3925 indices < *indices_count;
3927 if (unique_indices[indices] ==
3928 register_list_format[ind_offset])
3932 if (indices >= *indices_count) {
3933 unique_indices[*indices_count] =
3934 register_list_format[ind_offset];
3935 indices = *indices_count;
3936 *indices_count = *indices_count + 1;
3937 BUG_ON(*indices_count >= max_indices);
3940 register_list_format[ind_offset] = indices;
3944 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3947 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3948 int indices_count = 0;
3949 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3950 int offset_count = 0;
3953 unsigned int *register_list_format =
3954 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3955 if (!register_list_format)
3957 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3958 adev->gfx.rlc.reg_list_format_size_bytes);
3960 gfx_v8_0_parse_ind_reg_list(register_list_format,
3961 RLC_FormatDirectRegListLength,
3962 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3965 ARRAY_SIZE(unique_indices),
3966 indirect_start_offsets,
3968 ARRAY_SIZE(indirect_start_offsets));
3970 /* save and restore list */
3971 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3973 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3974 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3975 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3978 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3979 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3980 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3982 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3983 list_size = list_size >> 1;
3984 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3985 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3987 /* starting offsets starts */
3988 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3989 adev->gfx.rlc.starting_offsets_start);
3990 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3991 WREG32(mmRLC_GPM_SCRATCH_DATA,
3992 indirect_start_offsets[i]);
3994 /* unique indices */
3995 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3996 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3997 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3998 if (unique_indices[i] != 0) {
3999 WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4000 WREG32(data + i, unique_indices[i] >> 20);
4003 kfree(register_list_format);
4008 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4010 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4013 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4017 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4019 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4020 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4021 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4022 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4023 WREG32(mmRLC_PG_DELAY, data);
4025 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4026 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4030 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4033 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4036 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4039 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4042 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4044 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4047 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4049 if ((adev->asic_type == CHIP_CARRIZO) ||
4050 (adev->asic_type == CHIP_STONEY)) {
4051 gfx_v8_0_init_csb(adev);
4052 gfx_v8_0_init_save_restore_list(adev);
4053 gfx_v8_0_enable_save_restore_machine(adev);
4054 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4055 gfx_v8_0_init_power_gating(adev);
4056 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4057 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4058 (adev->asic_type == CHIP_POLARIS12)) {
4059 gfx_v8_0_init_csb(adev);
4060 gfx_v8_0_init_save_restore_list(adev);
4061 gfx_v8_0_enable_save_restore_machine(adev);
4062 gfx_v8_0_init_power_gating(adev);
4067 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4069 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4071 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4072 gfx_v8_0_wait_for_rlc_serdes(adev);
4075 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4077 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4080 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4084 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4086 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4088 /* carrizo do enable cp interrupt after cp inited */
4089 if (!(adev->flags & AMD_IS_APU))
4090 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4095 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4097 const struct rlc_firmware_header_v2_0 *hdr;
4098 const __le32 *fw_data;
4099 unsigned i, fw_size;
4101 if (!adev->gfx.rlc_fw)
4104 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4105 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4107 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4108 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4109 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4111 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4112 for (i = 0; i < fw_size; i++)
4113 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4114 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4119 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4124 gfx_v8_0_rlc_stop(adev);
4127 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4128 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4129 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4130 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4131 if (adev->asic_type == CHIP_POLARIS11 ||
4132 adev->asic_type == CHIP_POLARIS10 ||
4133 adev->asic_type == CHIP_POLARIS12) {
4134 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4136 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4140 WREG32(mmRLC_PG_CNTL, 0);
4142 gfx_v8_0_rlc_reset(adev);
4143 gfx_v8_0_init_pg(adev);
4146 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4147 /* legacy rlc firmware loading */
4148 r = gfx_v8_0_rlc_load_microcode(adev);
4153 gfx_v8_0_rlc_start(adev);
4158 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4161 u32 tmp = RREG32(mmCP_ME_CNTL);
4164 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4165 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4166 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4168 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4169 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4170 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4171 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4172 adev->gfx.gfx_ring[i].ready = false;
4174 WREG32(mmCP_ME_CNTL, tmp);
4178 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4180 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4181 const struct gfx_firmware_header_v1_0 *ce_hdr;
4182 const struct gfx_firmware_header_v1_0 *me_hdr;
4183 const __le32 *fw_data;
4184 unsigned i, fw_size;
4186 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4189 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4190 adev->gfx.pfp_fw->data;
4191 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4192 adev->gfx.ce_fw->data;
4193 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4194 adev->gfx.me_fw->data;
4196 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4197 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4198 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4200 gfx_v8_0_cp_gfx_enable(adev, false);
4203 fw_data = (const __le32 *)
4204 (adev->gfx.pfp_fw->data +
4205 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4206 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4207 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4208 for (i = 0; i < fw_size; i++)
4209 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4210 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4213 fw_data = (const __le32 *)
4214 (adev->gfx.ce_fw->data +
4215 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4216 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4217 WREG32(mmCP_CE_UCODE_ADDR, 0);
4218 for (i = 0; i < fw_size; i++)
4219 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4220 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4223 fw_data = (const __le32 *)
4224 (adev->gfx.me_fw->data +
4225 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4226 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4227 WREG32(mmCP_ME_RAM_WADDR, 0);
4228 for (i = 0; i < fw_size; i++)
4229 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4230 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4235 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4238 const struct cs_section_def *sect = NULL;
4239 const struct cs_extent_def *ext = NULL;
4241 /* begin clear state */
4243 /* context control state */
4246 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4247 for (ext = sect->section; ext->extent != NULL; ++ext) {
4248 if (sect->id == SECT_CONTEXT)
4249 count += 2 + ext->reg_count;
4254 /* pa_sc_raster_config/pa_sc_raster_config1 */
4256 /* end clear state */
4264 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4266 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4267 const struct cs_section_def *sect = NULL;
4268 const struct cs_extent_def *ext = NULL;
4272 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4273 WREG32(mmCP_ENDIAN_SWAP, 0);
4274 WREG32(mmCP_DEVICE_ID, 1);
4276 gfx_v8_0_cp_gfx_enable(adev, true);
4278 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4280 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4284 /* clear state buffer */
4285 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4286 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4288 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4289 amdgpu_ring_write(ring, 0x80000000);
4290 amdgpu_ring_write(ring, 0x80000000);
4292 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4293 for (ext = sect->section; ext->extent != NULL; ++ext) {
4294 if (sect->id == SECT_CONTEXT) {
4295 amdgpu_ring_write(ring,
4296 PACKET3(PACKET3_SET_CONTEXT_REG,
4298 amdgpu_ring_write(ring,
4299 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4300 for (i = 0; i < ext->reg_count; i++)
4301 amdgpu_ring_write(ring, ext->extent[i]);
4306 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4307 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4308 switch (adev->asic_type) {
4310 case CHIP_POLARIS10:
4311 amdgpu_ring_write(ring, 0x16000012);
4312 amdgpu_ring_write(ring, 0x0000002A);
4314 case CHIP_POLARIS11:
4315 case CHIP_POLARIS12:
4316 amdgpu_ring_write(ring, 0x16000012);
4317 amdgpu_ring_write(ring, 0x00000000);
4320 amdgpu_ring_write(ring, 0x3a00161a);
4321 amdgpu_ring_write(ring, 0x0000002e);
4324 amdgpu_ring_write(ring, 0x00000002);
4325 amdgpu_ring_write(ring, 0x00000000);
4328 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4329 0x00000000 : 0x00000002);
4330 amdgpu_ring_write(ring, 0x00000000);
4333 amdgpu_ring_write(ring, 0x00000000);
4334 amdgpu_ring_write(ring, 0x00000000);
4340 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4341 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4343 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4344 amdgpu_ring_write(ring, 0);
4346 /* init the CE partitions */
4347 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4348 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4349 amdgpu_ring_write(ring, 0x8000);
4350 amdgpu_ring_write(ring, 0x8000);
4352 amdgpu_ring_commit(ring);
4356 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4359 /* no gfx doorbells on iceland */
4360 if (adev->asic_type == CHIP_TOPAZ)
4363 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4365 if (ring->use_doorbell) {
4366 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4367 DOORBELL_OFFSET, ring->doorbell_index);
4368 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4370 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4373 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4376 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4378 if (adev->flags & AMD_IS_APU)
4381 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4382 DOORBELL_RANGE_LOWER,
4383 AMDGPU_DOORBELL_GFX_RING0);
4384 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4386 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4387 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4390 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4392 struct amdgpu_ring *ring;
4395 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4398 /* Set the write pointer delay */
4399 WREG32(mmCP_RB_WPTR_DELAY, 0);
4401 /* set the RB to use vmid 0 */
4402 WREG32(mmCP_RB_VMID, 0);
4404 /* Set ring buffer size */
4405 ring = &adev->gfx.gfx_ring[0];
4406 rb_bufsz = order_base_2(ring->ring_size / 8);
4407 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4408 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4409 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4410 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4412 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4414 WREG32(mmCP_RB0_CNTL, tmp);
4416 /* Initialize the ring buffer's read and write pointers */
4417 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4419 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4421 /* set the wb address wether it's enabled or not */
4422 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4423 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4424 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4426 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4427 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4428 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4430 WREG32(mmCP_RB0_CNTL, tmp);
4432 rb_addr = ring->gpu_addr >> 8;
4433 WREG32(mmCP_RB0_BASE, rb_addr);
4434 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4436 gfx_v8_0_set_cpg_door_bell(adev, ring);
4437 /* start the ring */
4438 amdgpu_ring_clear_ring(ring);
4439 gfx_v8_0_cp_gfx_start(adev);
4441 r = amdgpu_ring_test_ring(ring);
4443 ring->ready = false;
4448 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4453 WREG32(mmCP_MEC_CNTL, 0);
4455 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4456 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4457 adev->gfx.compute_ring[i].ready = false;
4458 adev->gfx.kiq.ring.ready = false;
4463 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4465 const struct gfx_firmware_header_v1_0 *mec_hdr;
4466 const __le32 *fw_data;
4467 unsigned i, fw_size;
4469 if (!adev->gfx.mec_fw)
4472 gfx_v8_0_cp_compute_enable(adev, false);
4474 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4475 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4477 fw_data = (const __le32 *)
4478 (adev->gfx.mec_fw->data +
4479 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4480 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4483 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4484 for (i = 0; i < fw_size; i++)
4485 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4486 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4488 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4489 if (adev->gfx.mec2_fw) {
4490 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4492 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4493 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4495 fw_data = (const __le32 *)
4496 (adev->gfx.mec2_fw->data +
4497 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4498 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4500 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4501 for (i = 0; i < fw_size; i++)
4502 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4503 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4510 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4513 struct amdgpu_device *adev = ring->adev;
4515 /* tell RLC which is KIQ queue */
4516 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4518 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4519 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4521 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4524 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4526 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4527 uint32_t scratch, tmp = 0;
4528 uint64_t queue_mask = 0;
4531 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4532 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4535 /* This situation may be hit in the future if a new HW
4536 * generation exposes more than 64 queues. If so, the
4537 * definition of queue_mask needs updating */
4538 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4539 DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4543 queue_mask |= (1ull << i);
4546 r = amdgpu_gfx_scratch_get(adev, &scratch);
4548 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4551 WREG32(scratch, 0xCAFEDEAD);
4553 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4555 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4556 amdgpu_gfx_scratch_free(adev, scratch);
4560 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4561 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4562 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4563 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4564 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4565 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4566 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4567 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4568 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4569 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4570 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4571 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4574 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4575 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4576 amdgpu_ring_write(kiq_ring,
4577 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4578 amdgpu_ring_write(kiq_ring,
4579 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4580 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4581 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4582 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4583 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4584 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4585 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4586 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4588 /* write to scratch for completion */
4589 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4590 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4591 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4592 amdgpu_ring_commit(kiq_ring);
4594 for (i = 0; i < adev->usec_timeout; i++) {
4595 tmp = RREG32(scratch);
4596 if (tmp == 0xDEADBEEF)
4600 if (i >= adev->usec_timeout) {
4601 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4605 amdgpu_gfx_scratch_free(adev, scratch);
4610 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4614 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4615 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4616 for (i = 0; i < adev->usec_timeout; i++) {
4617 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4621 if (i == adev->usec_timeout)
4624 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4625 WREG32(mmCP_HQD_PQ_RPTR, 0);
4626 WREG32(mmCP_HQD_PQ_WPTR, 0);
4631 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4633 struct amdgpu_device *adev = ring->adev;
4634 struct vi_mqd *mqd = ring->mqd_ptr;
4635 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4638 mqd->header = 0xC0310800;
4639 mqd->compute_pipelinestat_enable = 0x00000001;
4640 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4641 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4642 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4643 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4644 mqd->compute_misc_reserved = 0x00000003;
4645 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4646 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4647 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4648 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4649 eop_base_addr = ring->eop_gpu_addr >> 8;
4650 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4651 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4653 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4654 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4655 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4656 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4658 mqd->cp_hqd_eop_control = tmp;
4660 /* enable doorbell? */
4661 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4662 CP_HQD_PQ_DOORBELL_CONTROL,
4664 ring->use_doorbell ? 1 : 0);
4666 mqd->cp_hqd_pq_doorbell_control = tmp;
4668 /* set the pointer to the MQD */
4669 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4670 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4672 /* set MQD vmid to 0 */
4673 tmp = RREG32(mmCP_MQD_CONTROL);
4674 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4675 mqd->cp_mqd_control = tmp;
4677 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4678 hqd_gpu_addr = ring->gpu_addr >> 8;
4679 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4680 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4682 /* set up the HQD, this is similar to CP_RB0_CNTL */
4683 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4684 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4685 (order_base_2(ring->ring_size / 4) - 1));
4686 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4687 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4689 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4691 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4692 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4693 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4694 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4695 mqd->cp_hqd_pq_control = tmp;
4697 /* set the wb address whether it's enabled or not */
4698 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4699 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4700 mqd->cp_hqd_pq_rptr_report_addr_hi =
4701 upper_32_bits(wb_gpu_addr) & 0xffff;
4703 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4704 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4705 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4706 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4709 /* enable the doorbell if requested */
4710 if (ring->use_doorbell) {
4711 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4712 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4713 DOORBELL_OFFSET, ring->doorbell_index);
4715 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4717 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4718 DOORBELL_SOURCE, 0);
4719 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4723 mqd->cp_hqd_pq_doorbell_control = tmp;
4725 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4727 mqd->cp_hqd_pq_wptr = ring->wptr;
4728 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4730 /* set the vmid for the queue */
4731 mqd->cp_hqd_vmid = 0;
4733 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4734 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4735 mqd->cp_hqd_persistent_state = tmp;
4738 tmp = RREG32(mmCP_HQD_IB_CONTROL);
4739 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4740 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4741 mqd->cp_hqd_ib_control = tmp;
4743 tmp = RREG32(mmCP_HQD_IQ_TIMER);
4744 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4745 mqd->cp_hqd_iq_timer = tmp;
4747 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4748 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4749 mqd->cp_hqd_ctx_save_control = tmp;
4752 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4753 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4754 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4755 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4756 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4757 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4758 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4759 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4760 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4761 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4762 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4763 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4764 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4765 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4766 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4768 /* activate the queue */
4769 mqd->cp_hqd_active = 1;
4774 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4780 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4781 mqd_data = &mqd->cp_mqd_base_addr_lo;
4783 /* disable wptr polling */
4784 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4786 /* program all HQD registers */
4787 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4788 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4790 /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4791 * This is safe since EOP RPTR==WPTR for any inactive HQD
4792 * on ASICs that do not support context-save.
4793 * EOP writes/reads can start anywhere in the ring.
4795 if (adev->asic_type != CHIP_TONGA) {
4796 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4797 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4798 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4801 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4802 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4804 /* activate the HQD */
4805 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4806 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4811 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4813 struct amdgpu_device *adev = ring->adev;
4814 struct vi_mqd *mqd = ring->mqd_ptr;
4815 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4817 gfx_v8_0_kiq_setting(ring);
4819 if (adev->in_sriov_reset) { /* for GPU_RESET case */
4820 /* reset MQD to a clean status */
4821 if (adev->gfx.mec.mqd_backup[mqd_idx])
4822 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4824 /* reset ring buffer */
4826 amdgpu_ring_clear_ring(ring);
4827 mutex_lock(&adev->srbm_mutex);
4828 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4829 gfx_v8_0_mqd_commit(adev, mqd);
4830 vi_srbm_select(adev, 0, 0, 0, 0);
4831 mutex_unlock(&adev->srbm_mutex);
4833 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4834 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4835 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4836 mutex_lock(&adev->srbm_mutex);
4837 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4838 gfx_v8_0_mqd_init(ring);
4839 gfx_v8_0_mqd_commit(adev, mqd);
4840 vi_srbm_select(adev, 0, 0, 0, 0);
4841 mutex_unlock(&adev->srbm_mutex);
4843 if (adev->gfx.mec.mqd_backup[mqd_idx])
4844 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4850 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4852 struct amdgpu_device *adev = ring->adev;
4853 struct vi_mqd *mqd = ring->mqd_ptr;
4854 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4856 if (!adev->in_sriov_reset && !adev->gfx.in_suspend) {
4857 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4858 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4859 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4860 mutex_lock(&adev->srbm_mutex);
4861 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4862 gfx_v8_0_mqd_init(ring);
4863 vi_srbm_select(adev, 0, 0, 0, 0);
4864 mutex_unlock(&adev->srbm_mutex);
4866 if (adev->gfx.mec.mqd_backup[mqd_idx])
4867 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4868 } else if (adev->in_sriov_reset) { /* for GPU_RESET case */
4869 /* reset MQD to a clean status */
4870 if (adev->gfx.mec.mqd_backup[mqd_idx])
4871 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4872 /* reset ring buffer */
4874 amdgpu_ring_clear_ring(ring);
4876 amdgpu_ring_clear_ring(ring);
4881 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4883 if (adev->asic_type > CHIP_TONGA) {
4884 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4885 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4887 /* enable doorbells */
4888 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4891 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4893 struct amdgpu_ring *ring = NULL;
4896 gfx_v8_0_cp_compute_enable(adev, true);
4898 ring = &adev->gfx.kiq.ring;
4900 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4901 if (unlikely(r != 0))
4904 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4906 r = gfx_v8_0_kiq_init_queue(ring);
4907 amdgpu_bo_kunmap(ring->mqd_obj);
4908 ring->mqd_ptr = NULL;
4910 amdgpu_bo_unreserve(ring->mqd_obj);
4914 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4915 ring = &adev->gfx.compute_ring[i];
4917 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4918 if (unlikely(r != 0))
4920 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4922 r = gfx_v8_0_kcq_init_queue(ring);
4923 amdgpu_bo_kunmap(ring->mqd_obj);
4924 ring->mqd_ptr = NULL;
4926 amdgpu_bo_unreserve(ring->mqd_obj);
4931 gfx_v8_0_set_mec_doorbell_range(adev);
4933 r = gfx_v8_0_kiq_kcq_enable(adev);
4938 ring = &adev->gfx.kiq.ring;
4940 r = amdgpu_ring_test_ring(ring);
4942 ring->ready = false;
4947 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4948 ring = &adev->gfx.compute_ring[i];
4950 r = amdgpu_ring_test_ring(ring);
4952 ring->ready = false;
4959 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4963 if (!(adev->flags & AMD_IS_APU))
4964 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4966 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4967 /* legacy firmware loading */
4968 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4972 r = gfx_v8_0_cp_compute_load_microcode(adev);
4977 r = gfx_v8_0_cp_gfx_resume(adev);
4981 r = gfx_v8_0_kiq_resume(adev);
4985 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4990 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4992 gfx_v8_0_cp_gfx_enable(adev, enable);
4993 gfx_v8_0_cp_compute_enable(adev, enable);
4996 static int gfx_v8_0_hw_init(void *handle)
4999 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5001 gfx_v8_0_init_golden_registers(adev);
5002 gfx_v8_0_gpu_init(adev);
5004 r = gfx_v8_0_rlc_resume(adev);
5008 r = gfx_v8_0_cp_resume(adev);
5013 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5015 struct amdgpu_device *adev = kiq_ring->adev;
5016 uint32_t scratch, tmp = 0;
5019 r = amdgpu_gfx_scratch_get(adev, &scratch);
5021 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5024 WREG32(scratch, 0xCAFEDEAD);
5026 r = amdgpu_ring_alloc(kiq_ring, 10);
5028 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5029 amdgpu_gfx_scratch_free(adev, scratch);
5034 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5035 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5036 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5037 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5038 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5039 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5040 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5041 amdgpu_ring_write(kiq_ring, 0);
5042 amdgpu_ring_write(kiq_ring, 0);
5043 amdgpu_ring_write(kiq_ring, 0);
5044 /* write to scratch for completion */
5045 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5046 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5047 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5048 amdgpu_ring_commit(kiq_ring);
5050 for (i = 0; i < adev->usec_timeout; i++) {
5051 tmp = RREG32(scratch);
5052 if (tmp == 0xDEADBEEF)
5056 if (i >= adev->usec_timeout) {
5057 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5060 amdgpu_gfx_scratch_free(adev, scratch);
5064 static int gfx_v8_0_hw_fini(void *handle)
5066 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5069 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5070 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5072 /* disable KCQ to avoid CPC touch memory not valid anymore */
5073 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5074 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5076 if (amdgpu_sriov_vf(adev)) {
5077 pr_debug("For SRIOV client, shouldn't do anything.\n");
5080 gfx_v8_0_cp_enable(adev, false);
5081 gfx_v8_0_rlc_stop(adev);
5083 amdgpu_set_powergating_state(adev,
5084 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5089 static int gfx_v8_0_suspend(void *handle)
5091 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5092 adev->gfx.in_suspend = true;
5093 return gfx_v8_0_hw_fini(adev);
5096 static int gfx_v8_0_resume(void *handle)
5099 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5101 r = gfx_v8_0_hw_init(adev);
5102 adev->gfx.in_suspend = false;
5106 static bool gfx_v8_0_is_idle(void *handle)
5108 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5110 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5116 static int gfx_v8_0_wait_for_idle(void *handle)
5119 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5121 for (i = 0; i < adev->usec_timeout; i++) {
5122 if (gfx_v8_0_is_idle(handle))
5130 static bool gfx_v8_0_check_soft_reset(void *handle)
5132 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5133 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5137 tmp = RREG32(mmGRBM_STATUS);
5138 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5139 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5140 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5141 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5142 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5143 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5144 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5145 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5146 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5147 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5148 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5149 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5150 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5154 tmp = RREG32(mmGRBM_STATUS2);
5155 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5156 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5157 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5159 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5160 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5161 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5162 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5164 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5166 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5168 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5169 SOFT_RESET_GRBM, 1);
5173 tmp = RREG32(mmSRBM_STATUS);
5174 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5175 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5176 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5177 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5178 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5179 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5181 if (grbm_soft_reset || srbm_soft_reset) {
5182 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5183 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5186 adev->gfx.grbm_soft_reset = 0;
5187 adev->gfx.srbm_soft_reset = 0;
5192 static int gfx_v8_0_pre_soft_reset(void *handle)
5194 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5195 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5197 if ((!adev->gfx.grbm_soft_reset) &&
5198 (!adev->gfx.srbm_soft_reset))
5201 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5202 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5205 gfx_v8_0_rlc_stop(adev);
5207 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5208 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5209 /* Disable GFX parsing/prefetching */
5210 gfx_v8_0_cp_gfx_enable(adev, false);
5212 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5213 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5214 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5215 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5218 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5219 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5221 mutex_lock(&adev->srbm_mutex);
5222 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5223 gfx_v8_0_deactivate_hqd(adev, 2);
5224 vi_srbm_select(adev, 0, 0, 0, 0);
5225 mutex_unlock(&adev->srbm_mutex);
5227 /* Disable MEC parsing/prefetching */
5228 gfx_v8_0_cp_compute_enable(adev, false);
5234 static int gfx_v8_0_soft_reset(void *handle)
5236 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5237 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5240 if ((!adev->gfx.grbm_soft_reset) &&
5241 (!adev->gfx.srbm_soft_reset))
5244 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5245 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5247 if (grbm_soft_reset || srbm_soft_reset) {
5248 tmp = RREG32(mmGMCON_DEBUG);
5249 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5250 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5251 WREG32(mmGMCON_DEBUG, tmp);
5255 if (grbm_soft_reset) {
5256 tmp = RREG32(mmGRBM_SOFT_RESET);
5257 tmp |= grbm_soft_reset;
5258 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5259 WREG32(mmGRBM_SOFT_RESET, tmp);
5260 tmp = RREG32(mmGRBM_SOFT_RESET);
5264 tmp &= ~grbm_soft_reset;
5265 WREG32(mmGRBM_SOFT_RESET, tmp);
5266 tmp = RREG32(mmGRBM_SOFT_RESET);
5269 if (srbm_soft_reset) {
5270 tmp = RREG32(mmSRBM_SOFT_RESET);
5271 tmp |= srbm_soft_reset;
5272 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5273 WREG32(mmSRBM_SOFT_RESET, tmp);
5274 tmp = RREG32(mmSRBM_SOFT_RESET);
5278 tmp &= ~srbm_soft_reset;
5279 WREG32(mmSRBM_SOFT_RESET, tmp);
5280 tmp = RREG32(mmSRBM_SOFT_RESET);
5283 if (grbm_soft_reset || srbm_soft_reset) {
5284 tmp = RREG32(mmGMCON_DEBUG);
5285 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5286 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5287 WREG32(mmGMCON_DEBUG, tmp);
5290 /* Wait a little for things to settle down */
5296 static int gfx_v8_0_post_soft_reset(void *handle)
5298 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5299 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5301 if ((!adev->gfx.grbm_soft_reset) &&
5302 (!adev->gfx.srbm_soft_reset))
5305 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5306 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5308 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5309 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5310 gfx_v8_0_cp_gfx_resume(adev);
5312 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5313 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5314 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5315 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5318 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5319 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5321 mutex_lock(&adev->srbm_mutex);
5322 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5323 gfx_v8_0_deactivate_hqd(adev, 2);
5324 vi_srbm_select(adev, 0, 0, 0, 0);
5325 mutex_unlock(&adev->srbm_mutex);
5327 gfx_v8_0_kiq_resume(adev);
5329 gfx_v8_0_rlc_start(adev);
5335 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5337 * @adev: amdgpu_device pointer
5339 * Fetches a GPU clock counter snapshot.
5340 * Returns the 64 bit clock counter snapshot.
5342 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5346 mutex_lock(&adev->gfx.gpu_clock_mutex);
5347 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5348 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5349 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5350 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5354 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5356 uint32_t gds_base, uint32_t gds_size,
5357 uint32_t gws_base, uint32_t gws_size,
5358 uint32_t oa_base, uint32_t oa_size)
5360 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5361 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5363 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5364 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5366 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5367 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5370 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5371 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5372 WRITE_DATA_DST_SEL(0)));
5373 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5374 amdgpu_ring_write(ring, 0);
5375 amdgpu_ring_write(ring, gds_base);
5378 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5379 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5380 WRITE_DATA_DST_SEL(0)));
5381 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5382 amdgpu_ring_write(ring, 0);
5383 amdgpu_ring_write(ring, gds_size);
5386 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5387 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5388 WRITE_DATA_DST_SEL(0)));
5389 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5390 amdgpu_ring_write(ring, 0);
5391 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5394 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5395 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5396 WRITE_DATA_DST_SEL(0)));
5397 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5398 amdgpu_ring_write(ring, 0);
5399 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5402 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5404 WREG32(mmSQ_IND_INDEX,
5405 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5406 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5407 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5408 (SQ_IND_INDEX__FORCE_READ_MASK));
5409 return RREG32(mmSQ_IND_DATA);
5412 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5413 uint32_t wave, uint32_t thread,
5414 uint32_t regno, uint32_t num, uint32_t *out)
5416 WREG32(mmSQ_IND_INDEX,
5417 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5418 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5419 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5420 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5421 (SQ_IND_INDEX__FORCE_READ_MASK) |
5422 (SQ_IND_INDEX__AUTO_INCR_MASK));
5424 *(out++) = RREG32(mmSQ_IND_DATA);
5427 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5429 /* type 0 wave data */
5430 dst[(*no_fields)++] = 0;
5431 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5432 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5433 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5434 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5435 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5436 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5437 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5438 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5439 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5440 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5441 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5442 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5443 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5444 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5445 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5446 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5447 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5448 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5451 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5452 uint32_t wave, uint32_t start,
5453 uint32_t size, uint32_t *dst)
5456 adev, simd, wave, 0,
5457 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5461 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5462 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5463 .select_se_sh = &gfx_v8_0_select_se_sh,
5464 .read_wave_data = &gfx_v8_0_read_wave_data,
5465 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5468 static int gfx_v8_0_early_init(void *handle)
5470 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5472 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5473 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5474 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5475 gfx_v8_0_set_ring_funcs(adev);
5476 gfx_v8_0_set_irq_funcs(adev);
5477 gfx_v8_0_set_gds_init(adev);
5478 gfx_v8_0_set_rlc_funcs(adev);
5483 static int gfx_v8_0_late_init(void *handle)
5485 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5488 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5492 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5496 /* requires IBs so do in late init after IB pool is initialized */
5497 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5501 amdgpu_set_powergating_state(adev,
5502 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5507 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5510 if ((adev->asic_type == CHIP_POLARIS11) ||
5511 (adev->asic_type == CHIP_POLARIS12))
5512 /* Send msg to SMU via Powerplay */
5513 amdgpu_set_powergating_state(adev,
5514 AMD_IP_BLOCK_TYPE_SMC,
5516 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5518 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5521 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5524 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5527 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5530 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5533 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5536 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5539 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5542 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5544 /* Read any GFX register to wake up GFX. */
5546 RREG32(mmDB_RENDER_CONTROL);
5549 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5552 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5553 cz_enable_gfx_cg_power_gating(adev, true);
5554 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5555 cz_enable_gfx_pipeline_power_gating(adev, true);
5557 cz_enable_gfx_cg_power_gating(adev, false);
5558 cz_enable_gfx_pipeline_power_gating(adev, false);
5562 static int gfx_v8_0_set_powergating_state(void *handle,
5563 enum amd_powergating_state state)
5565 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5566 bool enable = (state == AMD_PG_STATE_GATE);
5568 if (amdgpu_sriov_vf(adev))
5571 switch (adev->asic_type) {
5575 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5576 cz_enable_sck_slow_down_on_power_up(adev, true);
5577 cz_enable_sck_slow_down_on_power_down(adev, true);
5579 cz_enable_sck_slow_down_on_power_up(adev, false);
5580 cz_enable_sck_slow_down_on_power_down(adev, false);
5582 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5583 cz_enable_cp_power_gating(adev, true);
5585 cz_enable_cp_power_gating(adev, false);
5587 cz_update_gfx_cg_power_gating(adev, enable);
5589 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5590 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5592 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5594 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5595 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5597 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5599 case CHIP_POLARIS11:
5600 case CHIP_POLARIS12:
5601 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5602 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5604 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5606 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5607 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5609 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5611 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5612 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5614 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5623 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5625 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5628 if (amdgpu_sriov_vf(adev))
5631 /* AMD_CG_SUPPORT_GFX_MGCG */
5632 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5633 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5634 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5636 /* AMD_CG_SUPPORT_GFX_CGLG */
5637 data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5638 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5639 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5641 /* AMD_CG_SUPPORT_GFX_CGLS */
5642 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5643 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5645 /* AMD_CG_SUPPORT_GFX_CGTS */
5646 data = RREG32(mmCGTS_SM_CTRL_REG);
5647 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5648 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5650 /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5651 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5652 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5654 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5655 data = RREG32(mmRLC_MEM_SLP_CNTL);
5656 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5657 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5659 /* AMD_CG_SUPPORT_GFX_CP_LS */
5660 data = RREG32(mmCP_MEM_SLP_CNTL);
5661 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5662 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5665 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5666 uint32_t reg_addr, uint32_t cmd)
5670 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5672 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5673 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5675 data = RREG32(mmRLC_SERDES_WR_CTRL);
5676 if (adev->asic_type == CHIP_STONEY)
5677 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5678 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5679 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5680 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5681 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5682 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5683 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5684 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5685 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5687 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5688 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5689 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5690 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5691 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5692 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5693 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5694 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5695 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5696 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5697 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5698 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5699 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5700 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5701 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5703 WREG32(mmRLC_SERDES_WR_CTRL, data);
5706 #define MSG_ENTER_RLC_SAFE_MODE 1
5707 #define MSG_EXIT_RLC_SAFE_MODE 0
5708 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5709 #define RLC_GPR_REG2__REQ__SHIFT 0
5710 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5711 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5713 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5718 data = RREG32(mmRLC_CNTL);
5719 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5722 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5723 data |= RLC_SAFE_MODE__CMD_MASK;
5724 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5725 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5726 WREG32(mmRLC_SAFE_MODE, data);
5728 for (i = 0; i < adev->usec_timeout; i++) {
5729 if ((RREG32(mmRLC_GPM_STAT) &
5730 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5731 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5732 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5733 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5738 for (i = 0; i < adev->usec_timeout; i++) {
5739 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5743 adev->gfx.rlc.in_safe_mode = true;
5747 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5752 data = RREG32(mmRLC_CNTL);
5753 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5756 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5757 if (adev->gfx.rlc.in_safe_mode) {
5758 data |= RLC_SAFE_MODE__CMD_MASK;
5759 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5760 WREG32(mmRLC_SAFE_MODE, data);
5761 adev->gfx.rlc.in_safe_mode = false;
5765 for (i = 0; i < adev->usec_timeout; i++) {
5766 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5772 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5773 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5774 .exit_safe_mode = iceland_exit_rlc_safe_mode
5777 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5780 uint32_t temp, data;
5782 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5784 /* It is disabled by HW by default */
5785 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5786 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5787 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5788 /* 1 - RLC memory Light sleep */
5789 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5791 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5792 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5795 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5796 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5797 if (adev->flags & AMD_IS_APU)
5798 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5799 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5800 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5802 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5803 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5804 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5805 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5808 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5810 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5811 gfx_v8_0_wait_for_rlc_serdes(adev);
5813 /* 5 - clear mgcg override */
5814 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5816 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5817 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5818 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5819 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5820 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5821 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5822 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5823 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5824 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5825 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5826 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5827 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5829 WREG32(mmCGTS_SM_CTRL_REG, data);
5833 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5834 gfx_v8_0_wait_for_rlc_serdes(adev);
5836 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5837 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5838 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5839 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5840 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5841 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5843 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5845 /* 2 - disable MGLS in RLC */
5846 data = RREG32(mmRLC_MEM_SLP_CNTL);
5847 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5848 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5849 WREG32(mmRLC_MEM_SLP_CNTL, data);
5852 /* 3 - disable MGLS in CP */
5853 data = RREG32(mmCP_MEM_SLP_CNTL);
5854 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5855 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5856 WREG32(mmCP_MEM_SLP_CNTL, data);
5859 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5860 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5861 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5862 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5864 WREG32(mmCGTS_SM_CTRL_REG, data);
5866 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5867 gfx_v8_0_wait_for_rlc_serdes(adev);
5869 /* 6 - set mgcg override */
5870 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5874 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5875 gfx_v8_0_wait_for_rlc_serdes(adev);
5878 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5881 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5884 uint32_t temp, temp1, data, data1;
5886 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5888 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5890 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5891 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5892 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5894 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5896 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5897 gfx_v8_0_wait_for_rlc_serdes(adev);
5899 /* 2 - clear cgcg override */
5900 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5902 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5903 gfx_v8_0_wait_for_rlc_serdes(adev);
5905 /* 3 - write cmd to set CGLS */
5906 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5908 /* 4 - enable cgcg */
5909 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5911 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5913 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5915 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5916 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5919 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5921 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5925 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5927 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5928 * Cmp_busy/GFX_Idle interrupts
5930 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5932 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5933 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5936 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5937 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5938 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5940 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5942 /* read gfx register to wake up cgcg */
5943 RREG32(mmCB_CGTT_SCLK_CTRL);
5944 RREG32(mmCB_CGTT_SCLK_CTRL);
5945 RREG32(mmCB_CGTT_SCLK_CTRL);
5946 RREG32(mmCB_CGTT_SCLK_CTRL);
5948 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5949 gfx_v8_0_wait_for_rlc_serdes(adev);
5951 /* write cmd to Set CGCG Overrride */
5952 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5954 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5955 gfx_v8_0_wait_for_rlc_serdes(adev);
5957 /* write cmd to Clear CGLS */
5958 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5960 /* disable cgcg, cgls should be disabled too. */
5961 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5962 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5964 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5965 /* enable interrupts again for PG */
5966 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5969 gfx_v8_0_wait_for_rlc_serdes(adev);
5971 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5973 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5977 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5978 * === MGCG + MGLS + TS(CG/LS) ===
5980 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5981 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5983 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5984 * === CGCG + CGLS ===
5986 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5987 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5992 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5993 enum amd_clockgating_state state)
5995 uint32_t msg_id, pp_state = 0;
5996 uint32_t pp_support_state = 0;
5998 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5999 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6000 pp_support_state = PP_STATE_SUPPORT_LS;
6001 pp_state = PP_STATE_LS;
6003 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6004 pp_support_state |= PP_STATE_SUPPORT_CG;
6005 pp_state |= PP_STATE_CG;
6007 if (state == AMD_CG_STATE_UNGATE)
6010 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6014 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6015 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6018 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6019 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6020 pp_support_state = PP_STATE_SUPPORT_LS;
6021 pp_state = PP_STATE_LS;
6024 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6025 pp_support_state |= PP_STATE_SUPPORT_CG;
6026 pp_state |= PP_STATE_CG;
6029 if (state == AMD_CG_STATE_UNGATE)
6032 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6036 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6037 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6043 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6044 enum amd_clockgating_state state)
6047 uint32_t msg_id, pp_state = 0;
6048 uint32_t pp_support_state = 0;
6050 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6051 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6052 pp_support_state = PP_STATE_SUPPORT_LS;
6053 pp_state = PP_STATE_LS;
6055 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6056 pp_support_state |= PP_STATE_SUPPORT_CG;
6057 pp_state |= PP_STATE_CG;
6059 if (state == AMD_CG_STATE_UNGATE)
6062 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6066 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6067 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6070 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6071 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6072 pp_support_state = PP_STATE_SUPPORT_LS;
6073 pp_state = PP_STATE_LS;
6075 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6076 pp_support_state |= PP_STATE_SUPPORT_CG;
6077 pp_state |= PP_STATE_CG;
6079 if (state == AMD_CG_STATE_UNGATE)
6082 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6086 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6087 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6090 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6091 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6092 pp_support_state = PP_STATE_SUPPORT_LS;
6093 pp_state = PP_STATE_LS;
6096 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6097 pp_support_state |= PP_STATE_SUPPORT_CG;
6098 pp_state |= PP_STATE_CG;
6101 if (state == AMD_CG_STATE_UNGATE)
6104 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6108 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6109 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6112 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6113 pp_support_state = PP_STATE_SUPPORT_LS;
6115 if (state == AMD_CG_STATE_UNGATE)
6118 pp_state = PP_STATE_LS;
6120 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6124 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6125 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6128 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6129 pp_support_state = PP_STATE_SUPPORT_LS;
6131 if (state == AMD_CG_STATE_UNGATE)
6134 pp_state = PP_STATE_LS;
6135 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6139 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6140 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6146 static int gfx_v8_0_set_clockgating_state(void *handle,
6147 enum amd_clockgating_state state)
6149 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6151 if (amdgpu_sriov_vf(adev))
6154 switch (adev->asic_type) {
6158 gfx_v8_0_update_gfx_clock_gating(adev,
6159 state == AMD_CG_STATE_GATE);
6162 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6164 case CHIP_POLARIS10:
6165 case CHIP_POLARIS11:
6166 case CHIP_POLARIS12:
6167 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6175 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6177 return ring->adev->wb.wb[ring->rptr_offs];
6180 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6182 struct amdgpu_device *adev = ring->adev;
6184 if (ring->use_doorbell)
6185 /* XXX check if swapping is necessary on BE */
6186 return ring->adev->wb.wb[ring->wptr_offs];
6188 return RREG32(mmCP_RB0_WPTR);
6191 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6193 struct amdgpu_device *adev = ring->adev;
6195 if (ring->use_doorbell) {
6196 /* XXX check if swapping is necessary on BE */
6197 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6198 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6200 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6201 (void)RREG32(mmCP_RB0_WPTR);
6205 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6207 u32 ref_and_mask, reg_mem_engine;
6209 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6210 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6213 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6216 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6223 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6224 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6227 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6228 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6229 WAIT_REG_MEM_FUNCTION(3) | /* == */
6231 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6232 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6233 amdgpu_ring_write(ring, ref_and_mask);
6234 amdgpu_ring_write(ring, ref_and_mask);
6235 amdgpu_ring_write(ring, 0x20); /* poll interval */
6238 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6240 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6241 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6244 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6245 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6250 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6252 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6253 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6254 WRITE_DATA_DST_SEL(0) |
6256 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6257 amdgpu_ring_write(ring, 0);
6258 amdgpu_ring_write(ring, 1);
6262 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6263 struct amdgpu_ib *ib,
6264 unsigned vm_id, bool ctx_switch)
6266 u32 header, control = 0;
6268 if (ib->flags & AMDGPU_IB_FLAG_CE)
6269 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6271 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6273 control |= ib->length_dw | (vm_id << 24);
6275 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6276 control |= INDIRECT_BUFFER_PRE_ENB(1);
6278 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6279 gfx_v8_0_ring_emit_de_meta(ring);
6282 amdgpu_ring_write(ring, header);
6283 amdgpu_ring_write(ring,
6287 (ib->gpu_addr & 0xFFFFFFFC));
6288 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6289 amdgpu_ring_write(ring, control);
6292 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6293 struct amdgpu_ib *ib,
6294 unsigned vm_id, bool ctx_switch)
6296 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6298 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6299 amdgpu_ring_write(ring,
6303 (ib->gpu_addr & 0xFFFFFFFC));
6304 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6305 amdgpu_ring_write(ring, control);
6308 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6309 u64 seq, unsigned flags)
6311 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6312 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6314 /* EVENT_WRITE_EOP - flush caches, send int */
6315 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6316 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6318 EOP_TC_WB_ACTION_EN |
6319 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6321 amdgpu_ring_write(ring, addr & 0xfffffffc);
6322 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6323 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6324 amdgpu_ring_write(ring, lower_32_bits(seq));
6325 amdgpu_ring_write(ring, upper_32_bits(seq));
6329 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6331 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6332 uint32_t seq = ring->fence_drv.sync_seq;
6333 uint64_t addr = ring->fence_drv.gpu_addr;
6335 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6336 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6337 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6338 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6339 amdgpu_ring_write(ring, addr & 0xfffffffc);
6340 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6341 amdgpu_ring_write(ring, seq);
6342 amdgpu_ring_write(ring, 0xffffffff);
6343 amdgpu_ring_write(ring, 4); /* poll interval */
6346 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6347 unsigned vm_id, uint64_t pd_addr)
6349 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6351 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6352 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6353 WRITE_DATA_DST_SEL(0)) |
6356 amdgpu_ring_write(ring,
6357 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6359 amdgpu_ring_write(ring,
6360 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6362 amdgpu_ring_write(ring, 0);
6363 amdgpu_ring_write(ring, pd_addr >> 12);
6365 /* bits 0-15 are the VM contexts0-15 */
6366 /* invalidate the cache */
6367 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6368 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6369 WRITE_DATA_DST_SEL(0)));
6370 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6371 amdgpu_ring_write(ring, 0);
6372 amdgpu_ring_write(ring, 1 << vm_id);
6374 /* wait for the invalidate to complete */
6375 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6376 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6377 WAIT_REG_MEM_FUNCTION(0) | /* always */
6378 WAIT_REG_MEM_ENGINE(0))); /* me */
6379 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6380 amdgpu_ring_write(ring, 0);
6381 amdgpu_ring_write(ring, 0); /* ref */
6382 amdgpu_ring_write(ring, 0); /* mask */
6383 amdgpu_ring_write(ring, 0x20); /* poll interval */
6385 /* compute doesn't have PFP */
6387 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6388 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6389 amdgpu_ring_write(ring, 0x0);
6393 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6395 return ring->adev->wb.wb[ring->wptr_offs];
6398 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6400 struct amdgpu_device *adev = ring->adev;
6402 /* XXX check if swapping is necessary on BE */
6403 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6404 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6407 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6410 struct amdgpu_device *adev = ring->adev;
6411 int pipe_num, tmp, reg;
6412 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6414 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6416 /* first me only has 2 entries, GFX and HP3D */
6420 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6422 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6426 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6427 struct amdgpu_ring *ring,
6432 struct amdgpu_ring *iring;
6434 mutex_lock(&adev->gfx.pipe_reserve_mutex);
6435 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6437 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6439 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6441 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6442 /* Clear all reservations - everyone reacquires all resources */
6443 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6444 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6447 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6448 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6451 /* Lower all pipes without a current reservation */
6452 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6453 iring = &adev->gfx.gfx_ring[i];
6454 pipe = amdgpu_gfx_queue_to_bit(adev,
6458 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6459 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6462 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6463 iring = &adev->gfx.compute_ring[i];
6464 pipe = amdgpu_gfx_queue_to_bit(adev,
6468 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6469 gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6473 mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6476 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6477 struct amdgpu_ring *ring,
6480 uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6481 uint32_t queue_priority = acquire ? 0xf : 0x0;
6483 mutex_lock(&adev->srbm_mutex);
6484 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6486 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6487 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6489 vi_srbm_select(adev, 0, 0, 0, 0);
6490 mutex_unlock(&adev->srbm_mutex);
6492 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6493 enum amd_sched_priority priority)
6495 struct amdgpu_device *adev = ring->adev;
6496 bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW;
6498 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6501 gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6502 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6505 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6509 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6510 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6512 /* RELEASE_MEM - flush caches, send int */
6513 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6514 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6516 EOP_TC_WB_ACTION_EN |
6517 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6519 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6520 amdgpu_ring_write(ring, addr & 0xfffffffc);
6521 amdgpu_ring_write(ring, upper_32_bits(addr));
6522 amdgpu_ring_write(ring, lower_32_bits(seq));
6523 amdgpu_ring_write(ring, upper_32_bits(seq));
6526 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6527 u64 seq, unsigned int flags)
6529 /* we only allocate 32bit for each seq wb address */
6530 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6532 /* write fence seq to the "addr" */
6533 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6534 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6535 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6536 amdgpu_ring_write(ring, lower_32_bits(addr));
6537 amdgpu_ring_write(ring, upper_32_bits(addr));
6538 amdgpu_ring_write(ring, lower_32_bits(seq));
6540 if (flags & AMDGPU_FENCE_FLAG_INT) {
6541 /* set register to trigger INT */
6542 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6543 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6544 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6545 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6546 amdgpu_ring_write(ring, 0);
6547 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6551 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6553 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6554 amdgpu_ring_write(ring, 0);
6557 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6561 if (amdgpu_sriov_vf(ring->adev))
6562 gfx_v8_0_ring_emit_ce_meta(ring);
6564 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6565 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6566 gfx_v8_0_ring_emit_vgt_flush(ring);
6567 /* set load_global_config & load_global_uconfig */
6569 /* set load_cs_sh_regs */
6571 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6574 /* set load_ce_ram if preamble presented */
6575 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6578 /* still load_ce_ram if this is the first time preamble presented
6579 * although there is no context switch happens.
6581 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6585 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6586 amdgpu_ring_write(ring, dw2);
6587 amdgpu_ring_write(ring, 0);
6590 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6594 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6595 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6596 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6597 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6598 ret = ring->wptr & ring->buf_mask;
6599 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6603 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6607 BUG_ON(offset > ring->buf_mask);
6608 BUG_ON(ring->ring[offset] != 0x55aa55aa);
6610 cur = (ring->wptr & ring->buf_mask) - 1;
6611 if (likely(cur > offset))
6612 ring->ring[offset] = cur - offset;
6614 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6617 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6619 struct amdgpu_device *adev = ring->adev;
6621 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6622 amdgpu_ring_write(ring, 0 | /* src: register*/
6623 (5 << 8) | /* dst: memory */
6624 (1 << 20)); /* write confirm */
6625 amdgpu_ring_write(ring, reg);
6626 amdgpu_ring_write(ring, 0);
6627 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6628 adev->virt.reg_val_offs * 4));
6629 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6630 adev->virt.reg_val_offs * 4));
6633 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6636 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6637 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6638 amdgpu_ring_write(ring, reg);
6639 amdgpu_ring_write(ring, 0);
6640 amdgpu_ring_write(ring, val);
6643 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6644 enum amdgpu_interrupt_state state)
6646 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6647 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6650 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6652 enum amdgpu_interrupt_state state)
6654 u32 mec_int_cntl, mec_int_cntl_reg;
6657 * amdgpu controls only the first MEC. That's why this function only
6658 * handles the setting of interrupts for this specific MEC. All other
6659 * pipes' interrupts are set by amdkfd.
6665 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6668 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6671 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6674 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6677 DRM_DEBUG("invalid pipe %d\n", pipe);
6681 DRM_DEBUG("invalid me %d\n", me);
6686 case AMDGPU_IRQ_STATE_DISABLE:
6687 mec_int_cntl = RREG32(mec_int_cntl_reg);
6688 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6689 WREG32(mec_int_cntl_reg, mec_int_cntl);
6691 case AMDGPU_IRQ_STATE_ENABLE:
6692 mec_int_cntl = RREG32(mec_int_cntl_reg);
6693 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6694 WREG32(mec_int_cntl_reg, mec_int_cntl);
6701 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6702 struct amdgpu_irq_src *source,
6704 enum amdgpu_interrupt_state state)
6706 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6707 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6712 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6713 struct amdgpu_irq_src *source,
6715 enum amdgpu_interrupt_state state)
6717 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6718 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6723 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6724 struct amdgpu_irq_src *src,
6726 enum amdgpu_interrupt_state state)
6729 case AMDGPU_CP_IRQ_GFX_EOP:
6730 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6732 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6733 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6735 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6736 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6738 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6739 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6741 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6742 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6744 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6745 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6747 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6748 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6750 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6751 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6753 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6754 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6762 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6763 struct amdgpu_irq_src *source,
6764 struct amdgpu_iv_entry *entry)
6767 u8 me_id, pipe_id, queue_id;
6768 struct amdgpu_ring *ring;
6770 DRM_DEBUG("IH: CP EOP\n");
6771 me_id = (entry->ring_id & 0x0c) >> 2;
6772 pipe_id = (entry->ring_id & 0x03) >> 0;
6773 queue_id = (entry->ring_id & 0x70) >> 4;
6777 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6781 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6782 ring = &adev->gfx.compute_ring[i];
6783 /* Per-queue interrupt is supported for MEC starting from VI.
6784 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6786 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6787 amdgpu_fence_process(ring);
6794 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6795 struct amdgpu_irq_src *source,
6796 struct amdgpu_iv_entry *entry)
6798 DRM_ERROR("Illegal register access in command stream\n");
6799 schedule_work(&adev->reset_work);
6803 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6804 struct amdgpu_irq_src *source,
6805 struct amdgpu_iv_entry *entry)
6807 DRM_ERROR("Illegal instruction in command stream\n");
6808 schedule_work(&adev->reset_work);
6812 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6813 struct amdgpu_irq_src *src,
6815 enum amdgpu_interrupt_state state)
6817 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6820 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6821 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6822 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6824 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6826 GENERIC2_INT_ENABLE,
6827 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6829 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6831 GENERIC2_INT_ENABLE,
6832 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6835 BUG(); /* kiq only support GENERIC2_INT now */
6841 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6842 struct amdgpu_irq_src *source,
6843 struct amdgpu_iv_entry *entry)
6845 u8 me_id, pipe_id, queue_id;
6846 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6848 me_id = (entry->ring_id & 0x0c) >> 2;
6849 pipe_id = (entry->ring_id & 0x03) >> 0;
6850 queue_id = (entry->ring_id & 0x70) >> 4;
6851 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6852 me_id, pipe_id, queue_id);
6854 amdgpu_fence_process(ring);
6858 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6860 .early_init = gfx_v8_0_early_init,
6861 .late_init = gfx_v8_0_late_init,
6862 .sw_init = gfx_v8_0_sw_init,
6863 .sw_fini = gfx_v8_0_sw_fini,
6864 .hw_init = gfx_v8_0_hw_init,
6865 .hw_fini = gfx_v8_0_hw_fini,
6866 .suspend = gfx_v8_0_suspend,
6867 .resume = gfx_v8_0_resume,
6868 .is_idle = gfx_v8_0_is_idle,
6869 .wait_for_idle = gfx_v8_0_wait_for_idle,
6870 .check_soft_reset = gfx_v8_0_check_soft_reset,
6871 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6872 .soft_reset = gfx_v8_0_soft_reset,
6873 .post_soft_reset = gfx_v8_0_post_soft_reset,
6874 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6875 .set_powergating_state = gfx_v8_0_set_powergating_state,
6876 .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6879 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6880 .type = AMDGPU_RING_TYPE_GFX,
6882 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6883 .support_64bit_ptrs = false,
6884 .get_rptr = gfx_v8_0_ring_get_rptr,
6885 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6886 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6887 .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6889 7 + /* PIPELINE_SYNC */
6891 8 + /* FENCE for VM_FLUSH */
6892 20 + /* GDS switch */
6893 4 + /* double SWITCH_BUFFER,
6894 the first COND_EXEC jump to the place just
6895 prior to this double SWITCH_BUFFER */
6903 8 + 8 + /* FENCE x2 */
6904 2, /* SWITCH_BUFFER */
6905 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6906 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6907 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6908 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6909 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6910 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6911 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6912 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6913 .test_ring = gfx_v8_0_ring_test_ring,
6914 .test_ib = gfx_v8_0_ring_test_ib,
6915 .insert_nop = amdgpu_ring_insert_nop,
6916 .pad_ib = amdgpu_ring_generic_pad_ib,
6917 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6918 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6919 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6920 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6923 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6924 .type = AMDGPU_RING_TYPE_COMPUTE,
6926 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6927 .support_64bit_ptrs = false,
6928 .get_rptr = gfx_v8_0_ring_get_rptr,
6929 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6930 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6932 20 + /* gfx_v8_0_ring_emit_gds_switch */
6933 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6934 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6935 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6936 17 + /* gfx_v8_0_ring_emit_vm_flush */
6937 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6938 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6939 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6940 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6941 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6942 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6943 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6944 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6945 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6946 .test_ring = gfx_v8_0_ring_test_ring,
6947 .test_ib = gfx_v8_0_ring_test_ib,
6948 .insert_nop = amdgpu_ring_insert_nop,
6949 .pad_ib = amdgpu_ring_generic_pad_ib,
6950 .set_priority = gfx_v8_0_ring_set_priority_compute,
6953 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6954 .type = AMDGPU_RING_TYPE_KIQ,
6956 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6957 .support_64bit_ptrs = false,
6958 .get_rptr = gfx_v8_0_ring_get_rptr,
6959 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6960 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6962 20 + /* gfx_v8_0_ring_emit_gds_switch */
6963 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6964 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6965 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6966 17 + /* gfx_v8_0_ring_emit_vm_flush */
6967 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6968 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6969 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6970 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6971 .test_ring = gfx_v8_0_ring_test_ring,
6972 .test_ib = gfx_v8_0_ring_test_ib,
6973 .insert_nop = amdgpu_ring_insert_nop,
6974 .pad_ib = amdgpu_ring_generic_pad_ib,
6975 .emit_rreg = gfx_v8_0_ring_emit_rreg,
6976 .emit_wreg = gfx_v8_0_ring_emit_wreg,
6979 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6983 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6985 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6986 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6988 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6989 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6992 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6993 .set = gfx_v8_0_set_eop_interrupt_state,
6994 .process = gfx_v8_0_eop_irq,
6997 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6998 .set = gfx_v8_0_set_priv_reg_fault_state,
6999 .process = gfx_v8_0_priv_reg_irq,
7002 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7003 .set = gfx_v8_0_set_priv_inst_fault_state,
7004 .process = gfx_v8_0_priv_inst_irq,
7007 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7008 .set = gfx_v8_0_kiq_set_interrupt_state,
7009 .process = gfx_v8_0_kiq_irq,
7012 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7014 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7015 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7017 adev->gfx.priv_reg_irq.num_types = 1;
7018 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7020 adev->gfx.priv_inst_irq.num_types = 1;
7021 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7023 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7024 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7027 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7029 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7032 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7034 /* init asci gds info */
7035 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7036 adev->gds.gws.total_size = 64;
7037 adev->gds.oa.total_size = 16;
7039 if (adev->gds.mem.total_size == 64 * 1024) {
7040 adev->gds.mem.gfx_partition_size = 4096;
7041 adev->gds.mem.cs_partition_size = 4096;
7043 adev->gds.gws.gfx_partition_size = 4;
7044 adev->gds.gws.cs_partition_size = 4;
7046 adev->gds.oa.gfx_partition_size = 4;
7047 adev->gds.oa.cs_partition_size = 1;
7049 adev->gds.mem.gfx_partition_size = 1024;
7050 adev->gds.mem.cs_partition_size = 1024;
7052 adev->gds.gws.gfx_partition_size = 16;
7053 adev->gds.gws.cs_partition_size = 16;
7055 adev->gds.oa.gfx_partition_size = 4;
7056 adev->gds.oa.cs_partition_size = 4;
7060 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7068 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7069 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7071 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7074 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7078 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7079 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7081 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7083 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7086 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7088 int i, j, k, counter, active_cu_number = 0;
7089 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7090 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7091 unsigned disable_masks[4 * 2];
7094 memset(cu_info, 0, sizeof(*cu_info));
7096 if (adev->flags & AMD_IS_APU)
7099 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7101 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7103 mutex_lock(&adev->grbm_idx_mutex);
7104 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7105 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7109 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7111 gfx_v8_0_set_user_cu_inactive_bitmap(
7112 adev, disable_masks[i * 2 + j]);
7113 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7114 cu_info->bitmap[i][j] = bitmap;
7116 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7117 if (bitmap & mask) {
7118 if (counter < ao_cu_num)
7124 active_cu_number += counter;
7126 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7127 cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7130 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7131 mutex_unlock(&adev->grbm_idx_mutex);
7133 cu_info->number = active_cu_number;
7134 cu_info->ao_cu_mask = ao_cu_mask;
7137 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7139 .type = AMD_IP_BLOCK_TYPE_GFX,
7143 .funcs = &gfx_v8_0_ip_funcs,
7146 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7148 .type = AMD_IP_BLOCK_TYPE_GFX,
7152 .funcs = &gfx_v8_0_ip_funcs,
7155 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7157 uint64_t ce_payload_addr;
7160 struct vi_ce_ib_state regular;
7161 struct vi_ce_ib_state_chained_ib chained;
7164 if (ring->adev->virt.chained_ib_support) {
7165 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7166 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7167 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7169 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7170 offsetof(struct vi_gfx_meta_data, ce_payload);
7171 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7174 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7175 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7176 WRITE_DATA_DST_SEL(8) |
7178 WRITE_DATA_CACHE_POLICY(0));
7179 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7180 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7181 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7184 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7186 uint64_t de_payload_addr, gds_addr, csa_addr;
7189 struct vi_de_ib_state regular;
7190 struct vi_de_ib_state_chained_ib chained;
7193 csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7194 gds_addr = csa_addr + 4096;
7195 if (ring->adev->virt.chained_ib_support) {
7196 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7197 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7198 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7199 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7201 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7202 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7203 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7204 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7207 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7208 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7209 WRITE_DATA_DST_SEL(8) |
7211 WRITE_DATA_CACHE_POLICY(0));
7212 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7213 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7214 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);