Merge tag 'leds_for_4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewsk...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160
161 static const u32 golden_settings_tonga_a11[] =
162 {
163         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166         mmGB_GPU_ID, 0x0000000f, 0x00000000,
167         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179
180 static const u32 tonga_golden_common_all[] =
181 {
182         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
274         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
275         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
276         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
277         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
278         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
279         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
280         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
281         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
282         mmSQ_CONFIG, 0x07f80000, 0x07180000,
283         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
284         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
285         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
286         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
287         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
288         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
289 };
290
291 static const u32 polaris11_golden_common_all[] =
292 {
293         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
294         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
295         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
296         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
297         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
298         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
299 };
300
301 static const u32 golden_settings_polaris10_a11[] =
302 {
303         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
304         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
305         mmCB_HW_CONTROL_2, 0, 0x0f000000,
306         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
311         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
312         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314         mmSQ_CONFIG, 0x07f80000, 0x07180000,
315         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
320 };
321
322 static const u32 polaris10_golden_common_all[] =
323 {
324         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
325         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
326         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
327         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
328         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
329         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
331         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
332 };
333
334 static const u32 fiji_golden_common_all[] =
335 {
336         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
337         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
338         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
339         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
340         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
341         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
342         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
343         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
344         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
345         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
346 };
347
348 static const u32 golden_settings_fiji_a10[] =
349 {
350         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
351         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
352         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
353         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
354         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
355         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
356         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
357         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
358         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
359         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
360         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
361 };
362
363 static const u32 fiji_mgcg_cgcg_init[] =
364 {
365         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
366         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
367         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
368         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
372         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
373         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
374         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
375         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
376         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
377         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
383         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
384         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
385         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
386         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
387         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
390         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
391         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
392         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
395         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
396         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
397         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
398         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
399         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
400 };
401
402 static const u32 golden_settings_iceland_a11[] =
403 {
404         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
405         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
406         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
407         mmGB_GPU_ID, 0x0000000f, 0x00000000,
408         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
409         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
410         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
411         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
412         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
413         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
414         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
415         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
416         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
417         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
418         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
419 };
420
421 static const u32 iceland_golden_common_all[] =
422 {
423         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
424         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
425         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
426         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
427         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
428         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
429         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
430         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
431 };
432
433 static const u32 iceland_mgcg_cgcg_init[] =
434 {
435         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
436         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
437         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
440         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
441         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
442         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
444         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
446         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
456         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
457         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
458         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
460         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
461         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
462         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
465         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
466         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
469         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
474         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
479         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
484         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
489         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
494         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
497         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
498         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
499 };
500
501 static const u32 cz_golden_settings_a11[] =
502 {
503         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
504         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
505         mmGB_GPU_ID, 0x0000000f, 0x00000000,
506         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
507         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
508         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
509         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
510         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
511         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
512         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
513 };
514
515 static const u32 cz_golden_common_all[] =
516 {
517         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
518         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
519         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
520         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
521         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
522         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
523         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
524         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
525 };
526
527 static const u32 cz_mgcg_cgcg_init[] =
528 {
529         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
533         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
534         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
535         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
538         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
540         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
542         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
544         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
547         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
548         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
549         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
550         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
551         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
553         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
554         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
555         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
556         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
557         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
558         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
559         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
560         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
566         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
567         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
568         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
569         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
570         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
571         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
572         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
573         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
574         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
575         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
576         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
577         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
578         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
579         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
580         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
581         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
582         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
583         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
584         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
585         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
586         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
587         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
588         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
589         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
590         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
591         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
592         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
593         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
594         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
595         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
596         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
597         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
598         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
599         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
600         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
601         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
602         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
603         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
604 };
605
606 static const u32 stoney_golden_settings_a11[] =
607 {
608         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
609         mmGB_GPU_ID, 0x0000000f, 0x00000000,
610         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
611         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
612         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
613         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
614         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
615         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
616         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
617         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
618 };
619
620 static const u32 stoney_golden_common_all[] =
621 {
622         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
623         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
624         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
625         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
626         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
627         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
628         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
629         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
630 };
631
632 static const u32 stoney_mgcg_cgcg_init[] =
633 {
634         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
636         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
637         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
638         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
639         mmATC_MISC_CG, 0xffffffff, 0x000c0200,
640 };
641
642 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
643 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
644 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
645 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
646 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
647 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
648
649 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
650 {
651         switch (adev->asic_type) {
652         case CHIP_TOPAZ:
653                 amdgpu_program_register_sequence(adev,
654                                                  iceland_mgcg_cgcg_init,
655                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
656                 amdgpu_program_register_sequence(adev,
657                                                  golden_settings_iceland_a11,
658                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
659                 amdgpu_program_register_sequence(adev,
660                                                  iceland_golden_common_all,
661                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
662                 break;
663         case CHIP_FIJI:
664                 amdgpu_program_register_sequence(adev,
665                                                  fiji_mgcg_cgcg_init,
666                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
667                 amdgpu_program_register_sequence(adev,
668                                                  golden_settings_fiji_a10,
669                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
670                 amdgpu_program_register_sequence(adev,
671                                                  fiji_golden_common_all,
672                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
673                 break;
674
675         case CHIP_TONGA:
676                 amdgpu_program_register_sequence(adev,
677                                                  tonga_mgcg_cgcg_init,
678                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
679                 amdgpu_program_register_sequence(adev,
680                                                  golden_settings_tonga_a11,
681                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
682                 amdgpu_program_register_sequence(adev,
683                                                  tonga_golden_common_all,
684                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
685                 break;
686         case CHIP_POLARIS11:
687                 amdgpu_program_register_sequence(adev,
688                                                  golden_settings_polaris11_a11,
689                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
690                 amdgpu_program_register_sequence(adev,
691                                                  polaris11_golden_common_all,
692                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
693                 break;
694         case CHIP_POLARIS10:
695                 amdgpu_program_register_sequence(adev,
696                                                  golden_settings_polaris10_a11,
697                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
698                 amdgpu_program_register_sequence(adev,
699                                                  polaris10_golden_common_all,
700                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
701                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
702                 if (adev->pdev->revision == 0xc7) {
703                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
704                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
705                 }
706                 break;
707         case CHIP_CARRIZO:
708                 amdgpu_program_register_sequence(adev,
709                                                  cz_mgcg_cgcg_init,
710                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
711                 amdgpu_program_register_sequence(adev,
712                                                  cz_golden_settings_a11,
713                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
714                 amdgpu_program_register_sequence(adev,
715                                                  cz_golden_common_all,
716                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
717                 break;
718         case CHIP_STONEY:
719                 amdgpu_program_register_sequence(adev,
720                                                  stoney_mgcg_cgcg_init,
721                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
722                 amdgpu_program_register_sequence(adev,
723                                                  stoney_golden_settings_a11,
724                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
725                 amdgpu_program_register_sequence(adev,
726                                                  stoney_golden_common_all,
727                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
728                 break;
729         default:
730                 break;
731         }
732 }
733
734 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
735 {
736         int i;
737
738         adev->gfx.scratch.num_reg = 7;
739         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
740         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
741                 adev->gfx.scratch.free[i] = true;
742                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
743         }
744 }
745
746 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
747 {
748         struct amdgpu_device *adev = ring->adev;
749         uint32_t scratch;
750         uint32_t tmp = 0;
751         unsigned i;
752         int r;
753
754         r = amdgpu_gfx_scratch_get(adev, &scratch);
755         if (r) {
756                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
757                 return r;
758         }
759         WREG32(scratch, 0xCAFEDEAD);
760         r = amdgpu_ring_alloc(ring, 3);
761         if (r) {
762                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
763                           ring->idx, r);
764                 amdgpu_gfx_scratch_free(adev, scratch);
765                 return r;
766         }
767         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
768         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
769         amdgpu_ring_write(ring, 0xDEADBEEF);
770         amdgpu_ring_commit(ring);
771
772         for (i = 0; i < adev->usec_timeout; i++) {
773                 tmp = RREG32(scratch);
774                 if (tmp == 0xDEADBEEF)
775                         break;
776                 DRM_UDELAY(1);
777         }
778         if (i < adev->usec_timeout) {
779                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
780                          ring->idx, i);
781         } else {
782                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
783                           ring->idx, scratch, tmp);
784                 r = -EINVAL;
785         }
786         amdgpu_gfx_scratch_free(adev, scratch);
787         return r;
788 }
789
790 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
791 {
792         struct amdgpu_device *adev = ring->adev;
793         struct amdgpu_ib ib;
794         struct fence *f = NULL;
795         uint32_t scratch;
796         uint32_t tmp = 0;
797         unsigned i;
798         int r;
799
800         r = amdgpu_gfx_scratch_get(adev, &scratch);
801         if (r) {
802                 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
803                 return r;
804         }
805         WREG32(scratch, 0xCAFEDEAD);
806         memset(&ib, 0, sizeof(ib));
807         r = amdgpu_ib_get(adev, NULL, 256, &ib);
808         if (r) {
809                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
810                 goto err1;
811         }
812         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
813         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
814         ib.ptr[2] = 0xDEADBEEF;
815         ib.length_dw = 3;
816
817         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
818         if (r)
819                 goto err2;
820
821         r = fence_wait(f, false);
822         if (r) {
823                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
824                 goto err2;
825         }
826         for (i = 0; i < adev->usec_timeout; i++) {
827                 tmp = RREG32(scratch);
828                 if (tmp == 0xDEADBEEF)
829                         break;
830                 DRM_UDELAY(1);
831         }
832         if (i < adev->usec_timeout) {
833                 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
834                          ring->idx, i);
835                 goto err2;
836         } else {
837                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
838                           scratch, tmp);
839                 r = -EINVAL;
840         }
841 err2:
842         fence_put(f);
843         amdgpu_ib_free(adev, &ib, NULL);
844         fence_put(f);
845 err1:
846         amdgpu_gfx_scratch_free(adev, scratch);
847         return r;
848 }
849
850
851 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
852         release_firmware(adev->gfx.pfp_fw);
853         adev->gfx.pfp_fw = NULL;
854         release_firmware(adev->gfx.me_fw);
855         adev->gfx.me_fw = NULL;
856         release_firmware(adev->gfx.ce_fw);
857         adev->gfx.ce_fw = NULL;
858         release_firmware(adev->gfx.rlc_fw);
859         adev->gfx.rlc_fw = NULL;
860         release_firmware(adev->gfx.mec_fw);
861         adev->gfx.mec_fw = NULL;
862         if ((adev->asic_type != CHIP_STONEY) &&
863             (adev->asic_type != CHIP_TOPAZ))
864                 release_firmware(adev->gfx.mec2_fw);
865         adev->gfx.mec2_fw = NULL;
866
867         kfree(adev->gfx.rlc.register_list_format);
868 }
869
870 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
871 {
872         const char *chip_name;
873         char fw_name[30];
874         int err;
875         struct amdgpu_firmware_info *info = NULL;
876         const struct common_firmware_header *header = NULL;
877         const struct gfx_firmware_header_v1_0 *cp_hdr;
878         const struct rlc_firmware_header_v2_0 *rlc_hdr;
879         unsigned int *tmp = NULL, i;
880
881         DRM_DEBUG("\n");
882
883         switch (adev->asic_type) {
884         case CHIP_TOPAZ:
885                 chip_name = "topaz";
886                 break;
887         case CHIP_TONGA:
888                 chip_name = "tonga";
889                 break;
890         case CHIP_CARRIZO:
891                 chip_name = "carrizo";
892                 break;
893         case CHIP_FIJI:
894                 chip_name = "fiji";
895                 break;
896         case CHIP_POLARIS11:
897                 chip_name = "polaris11";
898                 break;
899         case CHIP_POLARIS10:
900                 chip_name = "polaris10";
901                 break;
902         case CHIP_STONEY:
903                 chip_name = "stoney";
904                 break;
905         default:
906                 BUG();
907         }
908
909         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
910         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
911         if (err)
912                 goto out;
913         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
914         if (err)
915                 goto out;
916         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
917         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
918         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
919
920         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
921         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
922         if (err)
923                 goto out;
924         err = amdgpu_ucode_validate(adev->gfx.me_fw);
925         if (err)
926                 goto out;
927         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
928         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
929         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
930
931         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
932         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
933         if (err)
934                 goto out;
935         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
936         if (err)
937                 goto out;
938         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
939         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
940         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
941
942         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
943         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
944         if (err)
945                 goto out;
946         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
947         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
948         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
949         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
950
951         adev->gfx.rlc.save_and_restore_offset =
952                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
953         adev->gfx.rlc.clear_state_descriptor_offset =
954                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
955         adev->gfx.rlc.avail_scratch_ram_locations =
956                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
957         adev->gfx.rlc.reg_restore_list_size =
958                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
959         adev->gfx.rlc.reg_list_format_start =
960                         le32_to_cpu(rlc_hdr->reg_list_format_start);
961         adev->gfx.rlc.reg_list_format_separate_start =
962                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
963         adev->gfx.rlc.starting_offsets_start =
964                         le32_to_cpu(rlc_hdr->starting_offsets_start);
965         adev->gfx.rlc.reg_list_format_size_bytes =
966                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
967         adev->gfx.rlc.reg_list_size_bytes =
968                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
969
970         adev->gfx.rlc.register_list_format =
971                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
972                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
973
974         if (!adev->gfx.rlc.register_list_format) {
975                 err = -ENOMEM;
976                 goto out;
977         }
978
979         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
980                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
981         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
982                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
983
984         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
985
986         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
987                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
988         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
989                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
990
991         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
992         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
993         if (err)
994                 goto out;
995         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
996         if (err)
997                 goto out;
998         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
999         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1000         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1001
1002         if ((adev->asic_type != CHIP_STONEY) &&
1003             (adev->asic_type != CHIP_TOPAZ)) {
1004                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1005                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1006                 if (!err) {
1007                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1008                         if (err)
1009                                 goto out;
1010                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1011                                 adev->gfx.mec2_fw->data;
1012                         adev->gfx.mec2_fw_version =
1013                                 le32_to_cpu(cp_hdr->header.ucode_version);
1014                         adev->gfx.mec2_feature_version =
1015                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1016                 } else {
1017                         err = 0;
1018                         adev->gfx.mec2_fw = NULL;
1019                 }
1020         }
1021
1022         if (adev->firmware.smu_load) {
1023                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1024                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1025                 info->fw = adev->gfx.pfp_fw;
1026                 header = (const struct common_firmware_header *)info->fw->data;
1027                 adev->firmware.fw_size +=
1028                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1029
1030                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1031                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1032                 info->fw = adev->gfx.me_fw;
1033                 header = (const struct common_firmware_header *)info->fw->data;
1034                 adev->firmware.fw_size +=
1035                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1036
1037                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1038                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1039                 info->fw = adev->gfx.ce_fw;
1040                 header = (const struct common_firmware_header *)info->fw->data;
1041                 adev->firmware.fw_size +=
1042                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1043
1044                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1045                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1046                 info->fw = adev->gfx.rlc_fw;
1047                 header = (const struct common_firmware_header *)info->fw->data;
1048                 adev->firmware.fw_size +=
1049                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1050
1051                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1052                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1053                 info->fw = adev->gfx.mec_fw;
1054                 header = (const struct common_firmware_header *)info->fw->data;
1055                 adev->firmware.fw_size +=
1056                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1057
1058                 if (adev->gfx.mec2_fw) {
1059                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1060                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1061                         info->fw = adev->gfx.mec2_fw;
1062                         header = (const struct common_firmware_header *)info->fw->data;
1063                         adev->firmware.fw_size +=
1064                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065                 }
1066
1067         }
1068
1069 out:
1070         if (err) {
1071                 dev_err(adev->dev,
1072                         "gfx8: Failed to load firmware \"%s\"\n",
1073                         fw_name);
1074                 release_firmware(adev->gfx.pfp_fw);
1075                 adev->gfx.pfp_fw = NULL;
1076                 release_firmware(adev->gfx.me_fw);
1077                 adev->gfx.me_fw = NULL;
1078                 release_firmware(adev->gfx.ce_fw);
1079                 adev->gfx.ce_fw = NULL;
1080                 release_firmware(adev->gfx.rlc_fw);
1081                 adev->gfx.rlc_fw = NULL;
1082                 release_firmware(adev->gfx.mec_fw);
1083                 adev->gfx.mec_fw = NULL;
1084                 release_firmware(adev->gfx.mec2_fw);
1085                 adev->gfx.mec2_fw = NULL;
1086         }
1087         return err;
1088 }
1089
1090 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1091                                     volatile u32 *buffer)
1092 {
1093         u32 count = 0, i;
1094         const struct cs_section_def *sect = NULL;
1095         const struct cs_extent_def *ext = NULL;
1096
1097         if (adev->gfx.rlc.cs_data == NULL)
1098                 return;
1099         if (buffer == NULL)
1100                 return;
1101
1102         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1103         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1104
1105         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1106         buffer[count++] = cpu_to_le32(0x80000000);
1107         buffer[count++] = cpu_to_le32(0x80000000);
1108
1109         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1110                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1111                         if (sect->id == SECT_CONTEXT) {
1112                                 buffer[count++] =
1113                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1114                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1115                                                 PACKET3_SET_CONTEXT_REG_START);
1116                                 for (i = 0; i < ext->reg_count; i++)
1117                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1118                         } else {
1119                                 return;
1120                         }
1121                 }
1122         }
1123
1124         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1125         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1126                         PACKET3_SET_CONTEXT_REG_START);
1127         switch (adev->asic_type) {
1128         case CHIP_TONGA:
1129         case CHIP_POLARIS10:
1130                 buffer[count++] = cpu_to_le32(0x16000012);
1131                 buffer[count++] = cpu_to_le32(0x0000002A);
1132                 break;
1133         case CHIP_POLARIS11:
1134                 buffer[count++] = cpu_to_le32(0x16000012);
1135                 buffer[count++] = cpu_to_le32(0x00000000);
1136                 break;
1137         case CHIP_FIJI:
1138                 buffer[count++] = cpu_to_le32(0x3a00161a);
1139                 buffer[count++] = cpu_to_le32(0x0000002e);
1140                 break;
1141         case CHIP_TOPAZ:
1142         case CHIP_CARRIZO:
1143                 buffer[count++] = cpu_to_le32(0x00000002);
1144                 buffer[count++] = cpu_to_le32(0x00000000);
1145                 break;
1146         case CHIP_STONEY:
1147                 buffer[count++] = cpu_to_le32(0x00000000);
1148                 buffer[count++] = cpu_to_le32(0x00000000);
1149                 break;
1150         default:
1151                 buffer[count++] = cpu_to_le32(0x00000000);
1152                 buffer[count++] = cpu_to_le32(0x00000000);
1153                 break;
1154         }
1155
1156         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1157         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1158
1159         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1160         buffer[count++] = cpu_to_le32(0);
1161 }
1162
1163 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1164 {
1165         int r;
1166
1167         /* clear state block */
1168         if (adev->gfx.rlc.clear_state_obj) {
1169                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1170                 if (unlikely(r != 0))
1171                         dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1172                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1173                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1174
1175                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1176                 adev->gfx.rlc.clear_state_obj = NULL;
1177         }
1178 }
1179
1180 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1181 {
1182         volatile u32 *dst_ptr;
1183         u32 dws;
1184         const struct cs_section_def *cs_data;
1185         int r;
1186
1187         adev->gfx.rlc.cs_data = vi_cs_data;
1188
1189         cs_data = adev->gfx.rlc.cs_data;
1190
1191         if (cs_data) {
1192                 /* clear state block */
1193                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1194
1195                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1196                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1197                                              AMDGPU_GEM_DOMAIN_VRAM,
1198                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1199                                              NULL, NULL,
1200                                              &adev->gfx.rlc.clear_state_obj);
1201                         if (r) {
1202                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1203                                 gfx_v8_0_rlc_fini(adev);
1204                                 return r;
1205                         }
1206                 }
1207                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1208                 if (unlikely(r != 0)) {
1209                         gfx_v8_0_rlc_fini(adev);
1210                         return r;
1211                 }
1212                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1213                                   &adev->gfx.rlc.clear_state_gpu_addr);
1214                 if (r) {
1215                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1216                         dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1217                         gfx_v8_0_rlc_fini(adev);
1218                         return r;
1219                 }
1220
1221                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1222                 if (r) {
1223                         dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1224                         gfx_v8_0_rlc_fini(adev);
1225                         return r;
1226                 }
1227                 /* set up the cs buffer */
1228                 dst_ptr = adev->gfx.rlc.cs_ptr;
1229                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1230                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1231                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1232         }
1233
1234         return 0;
1235 }
1236
1237 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1238 {
1239         int r;
1240
1241         if (adev->gfx.mec.hpd_eop_obj) {
1242                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1243                 if (unlikely(r != 0))
1244                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1245                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1246                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1247
1248                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1249                 adev->gfx.mec.hpd_eop_obj = NULL;
1250         }
1251 }
1252
1253 #define MEC_HPD_SIZE 2048
1254
1255 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1256 {
1257         int r;
1258         u32 *hpd;
1259
1260         /*
1261          * we assign only 1 pipe because all other pipes will
1262          * be handled by KFD
1263          */
1264         adev->gfx.mec.num_mec = 1;
1265         adev->gfx.mec.num_pipe = 1;
1266         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1267
1268         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1269                 r = amdgpu_bo_create(adev,
1270                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1271                                      PAGE_SIZE, true,
1272                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1273                                      &adev->gfx.mec.hpd_eop_obj);
1274                 if (r) {
1275                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1276                         return r;
1277                 }
1278         }
1279
1280         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1281         if (unlikely(r != 0)) {
1282                 gfx_v8_0_mec_fini(adev);
1283                 return r;
1284         }
1285         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1286                           &adev->gfx.mec.hpd_eop_gpu_addr);
1287         if (r) {
1288                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1289                 gfx_v8_0_mec_fini(adev);
1290                 return r;
1291         }
1292         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1293         if (r) {
1294                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1295                 gfx_v8_0_mec_fini(adev);
1296                 return r;
1297         }
1298
1299         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1300
1301         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1302         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1303
1304         return 0;
1305 }
1306
1307 static const u32 vgpr_init_compute_shader[] =
1308 {
1309         0x7e000209, 0x7e020208,
1310         0x7e040207, 0x7e060206,
1311         0x7e080205, 0x7e0a0204,
1312         0x7e0c0203, 0x7e0e0202,
1313         0x7e100201, 0x7e120200,
1314         0x7e140209, 0x7e160208,
1315         0x7e180207, 0x7e1a0206,
1316         0x7e1c0205, 0x7e1e0204,
1317         0x7e200203, 0x7e220202,
1318         0x7e240201, 0x7e260200,
1319         0x7e280209, 0x7e2a0208,
1320         0x7e2c0207, 0x7e2e0206,
1321         0x7e300205, 0x7e320204,
1322         0x7e340203, 0x7e360202,
1323         0x7e380201, 0x7e3a0200,
1324         0x7e3c0209, 0x7e3e0208,
1325         0x7e400207, 0x7e420206,
1326         0x7e440205, 0x7e460204,
1327         0x7e480203, 0x7e4a0202,
1328         0x7e4c0201, 0x7e4e0200,
1329         0x7e500209, 0x7e520208,
1330         0x7e540207, 0x7e560206,
1331         0x7e580205, 0x7e5a0204,
1332         0x7e5c0203, 0x7e5e0202,
1333         0x7e600201, 0x7e620200,
1334         0x7e640209, 0x7e660208,
1335         0x7e680207, 0x7e6a0206,
1336         0x7e6c0205, 0x7e6e0204,
1337         0x7e700203, 0x7e720202,
1338         0x7e740201, 0x7e760200,
1339         0x7e780209, 0x7e7a0208,
1340         0x7e7c0207, 0x7e7e0206,
1341         0xbf8a0000, 0xbf810000,
1342 };
1343
1344 static const u32 sgpr_init_compute_shader[] =
1345 {
1346         0xbe8a0100, 0xbe8c0102,
1347         0xbe8e0104, 0xbe900106,
1348         0xbe920108, 0xbe940100,
1349         0xbe960102, 0xbe980104,
1350         0xbe9a0106, 0xbe9c0108,
1351         0xbe9e0100, 0xbea00102,
1352         0xbea20104, 0xbea40106,
1353         0xbea60108, 0xbea80100,
1354         0xbeaa0102, 0xbeac0104,
1355         0xbeae0106, 0xbeb00108,
1356         0xbeb20100, 0xbeb40102,
1357         0xbeb60104, 0xbeb80106,
1358         0xbeba0108, 0xbebc0100,
1359         0xbebe0102, 0xbec00104,
1360         0xbec20106, 0xbec40108,
1361         0xbec60100, 0xbec80102,
1362         0xbee60004, 0xbee70005,
1363         0xbeea0006, 0xbeeb0007,
1364         0xbee80008, 0xbee90009,
1365         0xbefc0000, 0xbf8a0000,
1366         0xbf810000, 0x00000000,
1367 };
1368
1369 static const u32 vgpr_init_regs[] =
1370 {
1371         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1372         mmCOMPUTE_RESOURCE_LIMITS, 0,
1373         mmCOMPUTE_NUM_THREAD_X, 256*4,
1374         mmCOMPUTE_NUM_THREAD_Y, 1,
1375         mmCOMPUTE_NUM_THREAD_Z, 1,
1376         mmCOMPUTE_PGM_RSRC2, 20,
1377         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1378         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1379         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1380         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1381         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1382         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1383         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1384         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1385         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1386         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1387 };
1388
1389 static const u32 sgpr1_init_regs[] =
1390 {
1391         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1392         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1393         mmCOMPUTE_NUM_THREAD_X, 256*5,
1394         mmCOMPUTE_NUM_THREAD_Y, 1,
1395         mmCOMPUTE_NUM_THREAD_Z, 1,
1396         mmCOMPUTE_PGM_RSRC2, 20,
1397         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1398         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1399         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1400         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1401         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1402         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1403         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1404         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1405         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1406         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1407 };
1408
1409 static const u32 sgpr2_init_regs[] =
1410 {
1411         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1412         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1413         mmCOMPUTE_NUM_THREAD_X, 256*5,
1414         mmCOMPUTE_NUM_THREAD_Y, 1,
1415         mmCOMPUTE_NUM_THREAD_Z, 1,
1416         mmCOMPUTE_PGM_RSRC2, 20,
1417         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1418         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1419         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1420         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1421         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1422         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1423         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1424         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1425         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1426         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1427 };
1428
1429 static const u32 sec_ded_counter_registers[] =
1430 {
1431         mmCPC_EDC_ATC_CNT,
1432         mmCPC_EDC_SCRATCH_CNT,
1433         mmCPC_EDC_UCODE_CNT,
1434         mmCPF_EDC_ATC_CNT,
1435         mmCPF_EDC_ROQ_CNT,
1436         mmCPF_EDC_TAG_CNT,
1437         mmCPG_EDC_ATC_CNT,
1438         mmCPG_EDC_DMA_CNT,
1439         mmCPG_EDC_TAG_CNT,
1440         mmDC_EDC_CSINVOC_CNT,
1441         mmDC_EDC_RESTORE_CNT,
1442         mmDC_EDC_STATE_CNT,
1443         mmGDS_EDC_CNT,
1444         mmGDS_EDC_GRBM_CNT,
1445         mmGDS_EDC_OA_DED,
1446         mmSPI_EDC_CNT,
1447         mmSQC_ATC_EDC_GATCL1_CNT,
1448         mmSQC_EDC_CNT,
1449         mmSQ_EDC_DED_CNT,
1450         mmSQ_EDC_INFO,
1451         mmSQ_EDC_SEC_CNT,
1452         mmTCC_EDC_CNT,
1453         mmTCP_ATC_EDC_GATCL1_CNT,
1454         mmTCP_EDC_CNT,
1455         mmTD_EDC_CNT
1456 };
1457
1458 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1459 {
1460         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1461         struct amdgpu_ib ib;
1462         struct fence *f = NULL;
1463         int r, i;
1464         u32 tmp;
1465         unsigned total_size, vgpr_offset, sgpr_offset;
1466         u64 gpu_addr;
1467
1468         /* only supported on CZ */
1469         if (adev->asic_type != CHIP_CARRIZO)
1470                 return 0;
1471
1472         /* bail if the compute ring is not ready */
1473         if (!ring->ready)
1474                 return 0;
1475
1476         tmp = RREG32(mmGB_EDC_MODE);
1477         WREG32(mmGB_EDC_MODE, 0);
1478
1479         total_size =
1480                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1481         total_size +=
1482                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1483         total_size +=
1484                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1485         total_size = ALIGN(total_size, 256);
1486         vgpr_offset = total_size;
1487         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1488         sgpr_offset = total_size;
1489         total_size += sizeof(sgpr_init_compute_shader);
1490
1491         /* allocate an indirect buffer to put the commands in */
1492         memset(&ib, 0, sizeof(ib));
1493         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1494         if (r) {
1495                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1496                 return r;
1497         }
1498
1499         /* load the compute shaders */
1500         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1501                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1502
1503         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1504                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1505
1506         /* init the ib length to 0 */
1507         ib.length_dw = 0;
1508
1509         /* VGPR */
1510         /* write the register state for the compute dispatch */
1511         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1512                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1513                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1514                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1515         }
1516         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1517         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1518         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1519         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1520         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1521         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1522
1523         /* write dispatch packet */
1524         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1525         ib.ptr[ib.length_dw++] = 8; /* x */
1526         ib.ptr[ib.length_dw++] = 1; /* y */
1527         ib.ptr[ib.length_dw++] = 1; /* z */
1528         ib.ptr[ib.length_dw++] =
1529                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1530
1531         /* write CS partial flush packet */
1532         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1533         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1534
1535         /* SGPR1 */
1536         /* write the register state for the compute dispatch */
1537         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1538                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1539                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1540                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1541         }
1542         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1543         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1544         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1545         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1546         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1547         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1548
1549         /* write dispatch packet */
1550         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1551         ib.ptr[ib.length_dw++] = 8; /* x */
1552         ib.ptr[ib.length_dw++] = 1; /* y */
1553         ib.ptr[ib.length_dw++] = 1; /* z */
1554         ib.ptr[ib.length_dw++] =
1555                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1556
1557         /* write CS partial flush packet */
1558         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1559         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1560
1561         /* SGPR2 */
1562         /* write the register state for the compute dispatch */
1563         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1564                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1565                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1566                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1567         }
1568         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1569         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1570         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1571         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1572         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1573         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1574
1575         /* write dispatch packet */
1576         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1577         ib.ptr[ib.length_dw++] = 8; /* x */
1578         ib.ptr[ib.length_dw++] = 1; /* y */
1579         ib.ptr[ib.length_dw++] = 1; /* z */
1580         ib.ptr[ib.length_dw++] =
1581                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1582
1583         /* write CS partial flush packet */
1584         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1585         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1586
1587         /* shedule the ib on the ring */
1588         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1589         if (r) {
1590                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1591                 goto fail;
1592         }
1593
1594         /* wait for the GPU to finish processing the IB */
1595         r = fence_wait(f, false);
1596         if (r) {
1597                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1598                 goto fail;
1599         }
1600
1601         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1602         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1603         WREG32(mmGB_EDC_MODE, tmp);
1604
1605         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1606         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1607         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1608
1609
1610         /* read back registers to clear the counters */
1611         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1612                 RREG32(sec_ded_counter_registers[i]);
1613
1614 fail:
1615         fence_put(f);
1616         amdgpu_ib_free(adev, &ib, NULL);
1617         fence_put(f);
1618
1619         return r;
1620 }
1621
1622 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1623 {
1624         u32 gb_addr_config;
1625         u32 mc_shared_chmap, mc_arb_ramcfg;
1626         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1627         u32 tmp;
1628         int ret;
1629
1630         switch (adev->asic_type) {
1631         case CHIP_TOPAZ:
1632                 adev->gfx.config.max_shader_engines = 1;
1633                 adev->gfx.config.max_tile_pipes = 2;
1634                 adev->gfx.config.max_cu_per_sh = 6;
1635                 adev->gfx.config.max_sh_per_se = 1;
1636                 adev->gfx.config.max_backends_per_se = 2;
1637                 adev->gfx.config.max_texture_channel_caches = 2;
1638                 adev->gfx.config.max_gprs = 256;
1639                 adev->gfx.config.max_gs_threads = 32;
1640                 adev->gfx.config.max_hw_contexts = 8;
1641
1642                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1643                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1644                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1645                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1646                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1647                 break;
1648         case CHIP_FIJI:
1649                 adev->gfx.config.max_shader_engines = 4;
1650                 adev->gfx.config.max_tile_pipes = 16;
1651                 adev->gfx.config.max_cu_per_sh = 16;
1652                 adev->gfx.config.max_sh_per_se = 1;
1653                 adev->gfx.config.max_backends_per_se = 4;
1654                 adev->gfx.config.max_texture_channel_caches = 16;
1655                 adev->gfx.config.max_gprs = 256;
1656                 adev->gfx.config.max_gs_threads = 32;
1657                 adev->gfx.config.max_hw_contexts = 8;
1658
1659                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1660                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1661                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1662                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1663                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1664                 break;
1665         case CHIP_POLARIS11:
1666                 ret = amdgpu_atombios_get_gfx_info(adev);
1667                 if (ret)
1668                         return ret;
1669                 adev->gfx.config.max_gprs = 256;
1670                 adev->gfx.config.max_gs_threads = 32;
1671                 adev->gfx.config.max_hw_contexts = 8;
1672
1673                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1674                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1675                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1676                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1677                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1678                 break;
1679         case CHIP_POLARIS10:
1680                 ret = amdgpu_atombios_get_gfx_info(adev);
1681                 if (ret)
1682                         return ret;
1683                 adev->gfx.config.max_gprs = 256;
1684                 adev->gfx.config.max_gs_threads = 32;
1685                 adev->gfx.config.max_hw_contexts = 8;
1686
1687                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1688                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1689                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1690                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1691                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1692                 break;
1693         case CHIP_TONGA:
1694                 adev->gfx.config.max_shader_engines = 4;
1695                 adev->gfx.config.max_tile_pipes = 8;
1696                 adev->gfx.config.max_cu_per_sh = 8;
1697                 adev->gfx.config.max_sh_per_se = 1;
1698                 adev->gfx.config.max_backends_per_se = 2;
1699                 adev->gfx.config.max_texture_channel_caches = 8;
1700                 adev->gfx.config.max_gprs = 256;
1701                 adev->gfx.config.max_gs_threads = 32;
1702                 adev->gfx.config.max_hw_contexts = 8;
1703
1704                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1705                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1706                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1707                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1708                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1709                 break;
1710         case CHIP_CARRIZO:
1711                 adev->gfx.config.max_shader_engines = 1;
1712                 adev->gfx.config.max_tile_pipes = 2;
1713                 adev->gfx.config.max_sh_per_se = 1;
1714                 adev->gfx.config.max_backends_per_se = 2;
1715
1716                 switch (adev->pdev->revision) {
1717                 case 0xc4:
1718                 case 0x84:
1719                 case 0xc8:
1720                 case 0xcc:
1721                 case 0xe1:
1722                 case 0xe3:
1723                         /* B10 */
1724                         adev->gfx.config.max_cu_per_sh = 8;
1725                         break;
1726                 case 0xc5:
1727                 case 0x81:
1728                 case 0x85:
1729                 case 0xc9:
1730                 case 0xcd:
1731                 case 0xe2:
1732                 case 0xe4:
1733                         /* B8 */
1734                         adev->gfx.config.max_cu_per_sh = 6;
1735                         break;
1736                 case 0xc6:
1737                 case 0xca:
1738                 case 0xce:
1739                 case 0x88:
1740                         /* B6 */
1741                         adev->gfx.config.max_cu_per_sh = 6;
1742                         break;
1743                 case 0xc7:
1744                 case 0x87:
1745                 case 0xcb:
1746                 case 0xe5:
1747                 case 0x89:
1748                 default:
1749                         /* B4 */
1750                         adev->gfx.config.max_cu_per_sh = 4;
1751                         break;
1752                 }
1753
1754                 adev->gfx.config.max_texture_channel_caches = 2;
1755                 adev->gfx.config.max_gprs = 256;
1756                 adev->gfx.config.max_gs_threads = 32;
1757                 adev->gfx.config.max_hw_contexts = 8;
1758
1759                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1760                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1761                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1762                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1763                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1764                 break;
1765         case CHIP_STONEY:
1766                 adev->gfx.config.max_shader_engines = 1;
1767                 adev->gfx.config.max_tile_pipes = 2;
1768                 adev->gfx.config.max_sh_per_se = 1;
1769                 adev->gfx.config.max_backends_per_se = 1;
1770
1771                 switch (adev->pdev->revision) {
1772                 case 0xc0:
1773                 case 0xc1:
1774                 case 0xc2:
1775                 case 0xc4:
1776                 case 0xc8:
1777                 case 0xc9:
1778                         adev->gfx.config.max_cu_per_sh = 3;
1779                         break;
1780                 case 0xd0:
1781                 case 0xd1:
1782                 case 0xd2:
1783                 default:
1784                         adev->gfx.config.max_cu_per_sh = 2;
1785                         break;
1786                 }
1787
1788                 adev->gfx.config.max_texture_channel_caches = 2;
1789                 adev->gfx.config.max_gprs = 256;
1790                 adev->gfx.config.max_gs_threads = 16;
1791                 adev->gfx.config.max_hw_contexts = 8;
1792
1793                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1794                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1795                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1796                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1797                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1798                 break;
1799         default:
1800                 adev->gfx.config.max_shader_engines = 2;
1801                 adev->gfx.config.max_tile_pipes = 4;
1802                 adev->gfx.config.max_cu_per_sh = 2;
1803                 adev->gfx.config.max_sh_per_se = 1;
1804                 adev->gfx.config.max_backends_per_se = 2;
1805                 adev->gfx.config.max_texture_channel_caches = 4;
1806                 adev->gfx.config.max_gprs = 256;
1807                 adev->gfx.config.max_gs_threads = 32;
1808                 adev->gfx.config.max_hw_contexts = 8;
1809
1810                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1811                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1812                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1813                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1814                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1815                 break;
1816         }
1817
1818         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1819         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1820         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1821
1822         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1823         adev->gfx.config.mem_max_burst_length_bytes = 256;
1824         if (adev->flags & AMD_IS_APU) {
1825                 /* Get memory bank mapping mode. */
1826                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1827                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1828                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1829
1830                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1831                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1832                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1833
1834                 /* Validate settings in case only one DIMM installed. */
1835                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1836                         dimm00_addr_map = 0;
1837                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1838                         dimm01_addr_map = 0;
1839                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1840                         dimm10_addr_map = 0;
1841                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1842                         dimm11_addr_map = 0;
1843
1844                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1845                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1846                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1847                         adev->gfx.config.mem_row_size_in_kb = 2;
1848                 else
1849                         adev->gfx.config.mem_row_size_in_kb = 1;
1850         } else {
1851                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1852                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1853                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1854                         adev->gfx.config.mem_row_size_in_kb = 4;
1855         }
1856
1857         adev->gfx.config.shader_engine_tile_size = 32;
1858         adev->gfx.config.num_gpus = 1;
1859         adev->gfx.config.multi_gpu_tile_size = 64;
1860
1861         /* fix up row size */
1862         switch (adev->gfx.config.mem_row_size_in_kb) {
1863         case 1:
1864         default:
1865                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1866                 break;
1867         case 2:
1868                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1869                 break;
1870         case 4:
1871                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1872                 break;
1873         }
1874         adev->gfx.config.gb_addr_config = gb_addr_config;
1875
1876         return 0;
1877 }
1878
1879 static int gfx_v8_0_sw_init(void *handle)
1880 {
1881         int i, r;
1882         struct amdgpu_ring *ring;
1883         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1884
1885         /* EOP Event */
1886         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1887         if (r)
1888                 return r;
1889
1890         /* Privileged reg */
1891         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1892         if (r)
1893                 return r;
1894
1895         /* Privileged inst */
1896         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1897         if (r)
1898                 return r;
1899
1900         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1901
1902         gfx_v8_0_scratch_init(adev);
1903
1904         r = gfx_v8_0_init_microcode(adev);
1905         if (r) {
1906                 DRM_ERROR("Failed to load gfx firmware!\n");
1907                 return r;
1908         }
1909
1910         r = gfx_v8_0_rlc_init(adev);
1911         if (r) {
1912                 DRM_ERROR("Failed to init rlc BOs!\n");
1913                 return r;
1914         }
1915
1916         r = gfx_v8_0_mec_init(adev);
1917         if (r) {
1918                 DRM_ERROR("Failed to init MEC BOs!\n");
1919                 return r;
1920         }
1921
1922         /* set up the gfx ring */
1923         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1924                 ring = &adev->gfx.gfx_ring[i];
1925                 ring->ring_obj = NULL;
1926                 sprintf(ring->name, "gfx");
1927                 /* no gfx doorbells on iceland */
1928                 if (adev->asic_type != CHIP_TOPAZ) {
1929                         ring->use_doorbell = true;
1930                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1931                 }
1932
1933                 r = amdgpu_ring_init(adev, ring, 1024,
1934                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1935                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1936                                      AMDGPU_RING_TYPE_GFX);
1937                 if (r)
1938                         return r;
1939         }
1940
1941         /* set up the compute queues */
1942         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1943                 unsigned irq_type;
1944
1945                 /* max 32 queues per MEC */
1946                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1947                         DRM_ERROR("Too many (%d) compute rings!\n", i);
1948                         break;
1949                 }
1950                 ring = &adev->gfx.compute_ring[i];
1951                 ring->ring_obj = NULL;
1952                 ring->use_doorbell = true;
1953                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1954                 ring->me = 1; /* first MEC */
1955                 ring->pipe = i / 8;
1956                 ring->queue = i % 8;
1957                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1958                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1959                 /* type-2 packets are deprecated on MEC, use type-3 instead */
1960                 r = amdgpu_ring_init(adev, ring, 1024,
1961                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1962                                      &adev->gfx.eop_irq, irq_type,
1963                                      AMDGPU_RING_TYPE_COMPUTE);
1964                 if (r)
1965                         return r;
1966         }
1967
1968         /* reserve GDS, GWS and OA resource for gfx */
1969         r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1970                         PAGE_SIZE, true,
1971                         AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1972                         NULL, &adev->gds.gds_gfx_bo);
1973         if (r)
1974                 return r;
1975
1976         r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1977                 PAGE_SIZE, true,
1978                 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1979                 NULL, &adev->gds.gws_gfx_bo);
1980         if (r)
1981                 return r;
1982
1983         r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1984                         PAGE_SIZE, true,
1985                         AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1986                         NULL, &adev->gds.oa_gfx_bo);
1987         if (r)
1988                 return r;
1989
1990         adev->gfx.ce_ram_size = 0x8000;
1991
1992         r = gfx_v8_0_gpu_early_init(adev);
1993         if (r)
1994                 return r;
1995
1996         return 0;
1997 }
1998
1999 static int gfx_v8_0_sw_fini(void *handle)
2000 {
2001         int i;
2002         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2003
2004         amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2005         amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2006         amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2007
2008         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2009                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2010         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2011                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2012
2013         gfx_v8_0_mec_fini(adev);
2014
2015         gfx_v8_0_rlc_fini(adev);
2016
2017         gfx_v8_0_free_microcode(adev);
2018
2019         return 0;
2020 }
2021
2022 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2023 {
2024         uint32_t *modearray, *mod2array;
2025         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2026         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2027         u32 reg_offset;
2028
2029         modearray = adev->gfx.config.tile_mode_array;
2030         mod2array = adev->gfx.config.macrotile_mode_array;
2031
2032         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2033                 modearray[reg_offset] = 0;
2034
2035         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2036                 mod2array[reg_offset] = 0;
2037
2038         switch (adev->asic_type) {
2039         case CHIP_TOPAZ:
2040                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2041                                 PIPE_CONFIG(ADDR_SURF_P2) |
2042                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2043                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2044                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2045                                 PIPE_CONFIG(ADDR_SURF_P2) |
2046                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2047                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2048                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2049                                 PIPE_CONFIG(ADDR_SURF_P2) |
2050                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2051                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2052                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2053                                 PIPE_CONFIG(ADDR_SURF_P2) |
2054                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2055                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2056                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2057                                 PIPE_CONFIG(ADDR_SURF_P2) |
2058                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2059                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2060                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2061                                 PIPE_CONFIG(ADDR_SURF_P2) |
2062                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2063                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2064                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2065                                 PIPE_CONFIG(ADDR_SURF_P2) |
2066                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2067                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2068                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2069                                 PIPE_CONFIG(ADDR_SURF_P2));
2070                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2071                                 PIPE_CONFIG(ADDR_SURF_P2) |
2072                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2073                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2074                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2075                                  PIPE_CONFIG(ADDR_SURF_P2) |
2076                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2077                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2078                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2079                                  PIPE_CONFIG(ADDR_SURF_P2) |
2080                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2081                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2082                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2083                                  PIPE_CONFIG(ADDR_SURF_P2) |
2084                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2085                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2087                                  PIPE_CONFIG(ADDR_SURF_P2) |
2088                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2089                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2090                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2091                                  PIPE_CONFIG(ADDR_SURF_P2) |
2092                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2093                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2094                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2095                                  PIPE_CONFIG(ADDR_SURF_P2) |
2096                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2097                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2098                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2099                                  PIPE_CONFIG(ADDR_SURF_P2) |
2100                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2101                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2102                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2103                                  PIPE_CONFIG(ADDR_SURF_P2) |
2104                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2105                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2106                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2107                                  PIPE_CONFIG(ADDR_SURF_P2) |
2108                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2109                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2110                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2111                                  PIPE_CONFIG(ADDR_SURF_P2) |
2112                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2113                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2114                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2115                                  PIPE_CONFIG(ADDR_SURF_P2) |
2116                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2117                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2118                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2119                                  PIPE_CONFIG(ADDR_SURF_P2) |
2120                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2121                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2122                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2123                                  PIPE_CONFIG(ADDR_SURF_P2) |
2124                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2125                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2126                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2127                                  PIPE_CONFIG(ADDR_SURF_P2) |
2128                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2129                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2130                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2131                                  PIPE_CONFIG(ADDR_SURF_P2) |
2132                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2133                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2134                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135                                  PIPE_CONFIG(ADDR_SURF_P2) |
2136                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2137                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2139                                  PIPE_CONFIG(ADDR_SURF_P2) |
2140                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2141                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2142
2143                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2144                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2145                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2146                                 NUM_BANKS(ADDR_SURF_8_BANK));
2147                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2148                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2149                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2150                                 NUM_BANKS(ADDR_SURF_8_BANK));
2151                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2152                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2153                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2154                                 NUM_BANKS(ADDR_SURF_8_BANK));
2155                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2157                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2158                                 NUM_BANKS(ADDR_SURF_8_BANK));
2159                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2160                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2161                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2162                                 NUM_BANKS(ADDR_SURF_8_BANK));
2163                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2164                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2165                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2166                                 NUM_BANKS(ADDR_SURF_8_BANK));
2167                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2169                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170                                 NUM_BANKS(ADDR_SURF_8_BANK));
2171                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2172                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2173                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2174                                 NUM_BANKS(ADDR_SURF_16_BANK));
2175                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2176                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2177                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2178                                 NUM_BANKS(ADDR_SURF_16_BANK));
2179                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2180                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2181                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2182                                  NUM_BANKS(ADDR_SURF_16_BANK));
2183                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2184                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2185                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2186                                  NUM_BANKS(ADDR_SURF_16_BANK));
2187                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2188                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2189                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2190                                  NUM_BANKS(ADDR_SURF_16_BANK));
2191                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2194                                  NUM_BANKS(ADDR_SURF_16_BANK));
2195                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2196                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2197                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2198                                  NUM_BANKS(ADDR_SURF_8_BANK));
2199
2200                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2201                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2202                             reg_offset != 23)
2203                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2204
2205                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2206                         if (reg_offset != 7)
2207                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2208
2209                 break;
2210         case CHIP_FIJI:
2211                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2213                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2214                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2215                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2216                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2217                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2218                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2219                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2220                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2221                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2222                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2223                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2225                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2226                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2228                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2229                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2230                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2232                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2233                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2234                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2236                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2237                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2238                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2239                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2240                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2241                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2242                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2243                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2244                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2245                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2246                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2247                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2248                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2249                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2251                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2252                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2253                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2254                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2255                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2256                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2257                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2258                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2259                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2260                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2261                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2262                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2263                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2264                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2265                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2266                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2267                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2268                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2269                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2270                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2271                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2272                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2273                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2274                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2275                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2276                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2277                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2278                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2279                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2280                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2281                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2282                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2283                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2284                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2285                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2286                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2287                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2288                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2290                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2291                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2294                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2295                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2298                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2299                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2300                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2301                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2302                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2303                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2304                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2305                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2306                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2307                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2309                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2310                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2311                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2312                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2313                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2314                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2315                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2316                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2317                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2318                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2321                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2322                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2325                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2326                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2329                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2330                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2333
2334                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2336                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337                                 NUM_BANKS(ADDR_SURF_8_BANK));
2338                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2339                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2340                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2341                                 NUM_BANKS(ADDR_SURF_8_BANK));
2342                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2343                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2344                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2345                                 NUM_BANKS(ADDR_SURF_8_BANK));
2346                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2347                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2348                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2349                                 NUM_BANKS(ADDR_SURF_8_BANK));
2350                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2351                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2352                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2353                                 NUM_BANKS(ADDR_SURF_8_BANK));
2354                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2356                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2357                                 NUM_BANKS(ADDR_SURF_8_BANK));
2358                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2360                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2361                                 NUM_BANKS(ADDR_SURF_8_BANK));
2362                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2363                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2364                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365                                 NUM_BANKS(ADDR_SURF_8_BANK));
2366                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2367                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2368                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2369                                 NUM_BANKS(ADDR_SURF_8_BANK));
2370                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2371                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2372                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2373                                  NUM_BANKS(ADDR_SURF_8_BANK));
2374                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2375                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2376                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2377                                  NUM_BANKS(ADDR_SURF_8_BANK));
2378                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2379                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2380                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2381                                  NUM_BANKS(ADDR_SURF_8_BANK));
2382                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2383                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2384                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2385                                  NUM_BANKS(ADDR_SURF_8_BANK));
2386                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2387                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2388                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2389                                  NUM_BANKS(ADDR_SURF_4_BANK));
2390
2391                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2392                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2393
2394                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2395                         if (reg_offset != 7)
2396                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2397
2398                 break;
2399         case CHIP_TONGA:
2400                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2402                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2403                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2404                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2406                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2407                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2410                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2411                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2414                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2415                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2418                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2419                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2422                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2426                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2429                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2430                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2431                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2433                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2434                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2436                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2440                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2444                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2445                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2446                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2447                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2448                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2449                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2450                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2451                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2452                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2455                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2456                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2459                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2460                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2462                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2463                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2464                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2465                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2466                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2467                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2468                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2469                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2470                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2471                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2472                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2473                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2475                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2476                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2479                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2480                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2481                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2483                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2484                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2485                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2487                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2488                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2491                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2492                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2495                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2496                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2497                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2498                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2499                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2500                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2501                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2502                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2503                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2504                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2505                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2506                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2507                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2508                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2509                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2510                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2512                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2513                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2514                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2515                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2517                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2518                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2519                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2520                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2521                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2522
2523                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2525                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2526                                 NUM_BANKS(ADDR_SURF_16_BANK));
2527                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2529                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2530                                 NUM_BANKS(ADDR_SURF_16_BANK));
2531                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2533                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2534                                 NUM_BANKS(ADDR_SURF_16_BANK));
2535                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2537                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538                                 NUM_BANKS(ADDR_SURF_16_BANK));
2539                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2541                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2542                                 NUM_BANKS(ADDR_SURF_16_BANK));
2543                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2544                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2545                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2546                                 NUM_BANKS(ADDR_SURF_16_BANK));
2547                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2549                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2550                                 NUM_BANKS(ADDR_SURF_16_BANK));
2551                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2553                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554                                 NUM_BANKS(ADDR_SURF_16_BANK));
2555                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2557                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2558                                 NUM_BANKS(ADDR_SURF_16_BANK));
2559                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2561                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2562                                  NUM_BANKS(ADDR_SURF_16_BANK));
2563                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2566                                  NUM_BANKS(ADDR_SURF_16_BANK));
2567                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2568                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2569                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2570                                  NUM_BANKS(ADDR_SURF_8_BANK));
2571                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2572                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2573                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2574                                  NUM_BANKS(ADDR_SURF_4_BANK));
2575                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578                                  NUM_BANKS(ADDR_SURF_4_BANK));
2579
2580                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2581                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2582
2583                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2584                         if (reg_offset != 7)
2585                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2586
2587                 break;
2588         case CHIP_POLARIS11:
2589                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2590                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2591                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2592                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2593                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2594                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2595                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2596                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2597                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2600                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2601                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2602                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2603                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2604                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2605                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2606                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2607                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2608                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2609                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2610                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2611                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2612                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2613                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2614                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2615                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2616                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2617                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2618                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2619                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2620                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2621                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2622                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2623                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2624                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2625                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2629                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2630                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2631                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2633                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2634                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2635                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2636                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2637                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2638                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2639                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2640                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2643                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2644                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2645                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2646                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2647                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2648                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2649                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2650                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2651                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2652                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2653                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2655                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2656                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2657                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2658                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2659                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2660                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2661                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2662                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2663                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2664                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2665                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2666                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2667                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2668                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2669                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2670                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2671                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2672                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2673                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2674                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2675                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2676                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2677                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2678                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2679                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2680                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2681                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2682                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2683                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2684                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2685                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2686                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2687                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2688                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2689                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2690                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2691                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2692                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2693                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2694                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2695                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2696                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2698                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2702                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2706                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2707                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2708                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2710                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2711
2712                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2714                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2715                                 NUM_BANKS(ADDR_SURF_16_BANK));
2716
2717                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2719                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2720                                 NUM_BANKS(ADDR_SURF_16_BANK));
2721
2722                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2723                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2724                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2725                                 NUM_BANKS(ADDR_SURF_16_BANK));
2726
2727                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2729                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2730                                 NUM_BANKS(ADDR_SURF_16_BANK));
2731
2732                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2734                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735                                 NUM_BANKS(ADDR_SURF_16_BANK));
2736
2737                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2739                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2740                                 NUM_BANKS(ADDR_SURF_16_BANK));
2741
2742                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2743                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2744                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2745                                 NUM_BANKS(ADDR_SURF_16_BANK));
2746
2747                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2748                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2749                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2750                                 NUM_BANKS(ADDR_SURF_16_BANK));
2751
2752                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2753                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2754                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2755                                 NUM_BANKS(ADDR_SURF_16_BANK));
2756
2757                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2759                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2760                                 NUM_BANKS(ADDR_SURF_16_BANK));
2761
2762                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2764                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2765                                 NUM_BANKS(ADDR_SURF_16_BANK));
2766
2767                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2770                                 NUM_BANKS(ADDR_SURF_16_BANK));
2771
2772                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2774                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2775                                 NUM_BANKS(ADDR_SURF_8_BANK));
2776
2777                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2778                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2779                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2780                                 NUM_BANKS(ADDR_SURF_4_BANK));
2781
2782                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2783                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2784
2785                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2786                         if (reg_offset != 7)
2787                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2788
2789                 break;
2790         case CHIP_POLARIS10:
2791                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2792                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2793                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2794                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2795                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2796                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2797                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2798                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2799                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2800                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2801                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2802                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2803                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2805                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2806                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2807                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2808                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2809                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2810                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2811                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2812                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2813                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2814                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2815                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2816                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2817                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2818                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2819                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2822                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2823                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2824                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2825                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2826                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2827                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2828                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2829                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2830                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2831                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2832                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2833                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2834                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2835                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2836                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2837                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2838                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2839                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2840                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2841                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2842                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2843                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2844                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2845                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2847                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2848                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2849                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2850                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2851                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2852                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2854                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2855                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2856                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2857                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2858                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2860                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2861                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2862                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2863                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2864                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2865                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2866                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2867                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2868                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2869                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2870                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2871                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2872                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2873                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2874                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2875                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2876                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2877                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2878                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2879                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2880                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2881                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2882                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2883                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2884                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2885                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2886                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2887                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2888                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2889                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2890                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2891                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2892                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2893                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2894                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2895                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2896                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2897                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2900                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2901                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2904                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2908                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2909                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2912                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913
2914                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2916                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917                                 NUM_BANKS(ADDR_SURF_16_BANK));
2918
2919                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2920                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2921                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2922                                 NUM_BANKS(ADDR_SURF_16_BANK));
2923
2924                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2925                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2926                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2927                                 NUM_BANKS(ADDR_SURF_16_BANK));
2928
2929                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2930                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2931                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2932                                 NUM_BANKS(ADDR_SURF_16_BANK));
2933
2934                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2935                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2936                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2937                                 NUM_BANKS(ADDR_SURF_16_BANK));
2938
2939                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2940                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2941                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2942                                 NUM_BANKS(ADDR_SURF_16_BANK));
2943
2944                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2947                                 NUM_BANKS(ADDR_SURF_16_BANK));
2948
2949                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2950                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2951                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2952                                 NUM_BANKS(ADDR_SURF_16_BANK));
2953
2954                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2955                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2956                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2957                                 NUM_BANKS(ADDR_SURF_16_BANK));
2958
2959                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2961                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2962                                 NUM_BANKS(ADDR_SURF_16_BANK));
2963
2964                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2965                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2966                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2967                                 NUM_BANKS(ADDR_SURF_16_BANK));
2968
2969                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2970                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2971                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2972                                 NUM_BANKS(ADDR_SURF_8_BANK));
2973
2974                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2976                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2977                                 NUM_BANKS(ADDR_SURF_4_BANK));
2978
2979                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2980                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2981                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2982                                 NUM_BANKS(ADDR_SURF_4_BANK));
2983
2984                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2985                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2986
2987                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2988                         if (reg_offset != 7)
2989                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2990
2991                 break;
2992         case CHIP_STONEY:
2993                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2994                                 PIPE_CONFIG(ADDR_SURF_P2) |
2995                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2996                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2997                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2998                                 PIPE_CONFIG(ADDR_SURF_P2) |
2999                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3000                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3001                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3002                                 PIPE_CONFIG(ADDR_SURF_P2) |
3003                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3004                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3005                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3006                                 PIPE_CONFIG(ADDR_SURF_P2) |
3007                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3008                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3009                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3010                                 PIPE_CONFIG(ADDR_SURF_P2) |
3011                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3012                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3013                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3014                                 PIPE_CONFIG(ADDR_SURF_P2) |
3015                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3017                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3018                                 PIPE_CONFIG(ADDR_SURF_P2) |
3019                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3020                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3021                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3022                                 PIPE_CONFIG(ADDR_SURF_P2));
3023                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3024                                 PIPE_CONFIG(ADDR_SURF_P2) |
3025                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3026                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3027                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3028                                  PIPE_CONFIG(ADDR_SURF_P2) |
3029                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3030                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3031                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3032                                  PIPE_CONFIG(ADDR_SURF_P2) |
3033                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3034                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3035                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3036                                  PIPE_CONFIG(ADDR_SURF_P2) |
3037                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3039                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3040                                  PIPE_CONFIG(ADDR_SURF_P2) |
3041                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3042                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3043                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3044                                  PIPE_CONFIG(ADDR_SURF_P2) |
3045                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3046                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3047                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3048                                  PIPE_CONFIG(ADDR_SURF_P2) |
3049                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3050                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3051                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3052                                  PIPE_CONFIG(ADDR_SURF_P2) |
3053                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3054                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3055                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3056                                  PIPE_CONFIG(ADDR_SURF_P2) |
3057                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3058                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3059                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3060                                  PIPE_CONFIG(ADDR_SURF_P2) |
3061                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3062                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3063                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3064                                  PIPE_CONFIG(ADDR_SURF_P2) |
3065                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3066                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3067                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3068                                  PIPE_CONFIG(ADDR_SURF_P2) |
3069                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3070                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3071                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3072                                  PIPE_CONFIG(ADDR_SURF_P2) |
3073                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3074                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3075                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3076                                  PIPE_CONFIG(ADDR_SURF_P2) |
3077                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3078                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3079                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3080                                  PIPE_CONFIG(ADDR_SURF_P2) |
3081                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3082                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3083                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3084                                  PIPE_CONFIG(ADDR_SURF_P2) |
3085                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3086                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3087                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3088                                  PIPE_CONFIG(ADDR_SURF_P2) |
3089                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3090                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3091                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3092                                  PIPE_CONFIG(ADDR_SURF_P2) |
3093                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3094                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3095
3096                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3097                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3098                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3099                                 NUM_BANKS(ADDR_SURF_8_BANK));
3100                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3101                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3102                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3103                                 NUM_BANKS(ADDR_SURF_8_BANK));
3104                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3105                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3106                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3107                                 NUM_BANKS(ADDR_SURF_8_BANK));
3108                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3109                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3110                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3111                                 NUM_BANKS(ADDR_SURF_8_BANK));
3112                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3114                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3115                                 NUM_BANKS(ADDR_SURF_8_BANK));
3116                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3117                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3118                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3119                                 NUM_BANKS(ADDR_SURF_8_BANK));
3120                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3121                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3122                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3123                                 NUM_BANKS(ADDR_SURF_8_BANK));
3124                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3125                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3126                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3127                                 NUM_BANKS(ADDR_SURF_16_BANK));
3128                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3129                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3130                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3131                                 NUM_BANKS(ADDR_SURF_16_BANK));
3132                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3133                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3134                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3135                                  NUM_BANKS(ADDR_SURF_16_BANK));
3136                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3137                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3138                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3139                                  NUM_BANKS(ADDR_SURF_16_BANK));
3140                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3141                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3142                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3143                                  NUM_BANKS(ADDR_SURF_16_BANK));
3144                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3145                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3146                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3147                                  NUM_BANKS(ADDR_SURF_16_BANK));
3148                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3150                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3151                                  NUM_BANKS(ADDR_SURF_8_BANK));
3152
3153                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3154                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3155                             reg_offset != 23)
3156                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3157
3158                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3159                         if (reg_offset != 7)
3160                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3161
3162                 break;
3163         default:
3164                 dev_warn(adev->dev,
3165                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3166                          adev->asic_type);
3167
3168         case CHIP_CARRIZO:
3169                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3170                                 PIPE_CONFIG(ADDR_SURF_P2) |
3171                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3172                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3173                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3174                                 PIPE_CONFIG(ADDR_SURF_P2) |
3175                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3176                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3177                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3178                                 PIPE_CONFIG(ADDR_SURF_P2) |
3179                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3180                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3181                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182                                 PIPE_CONFIG(ADDR_SURF_P2) |
3183                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3184                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3185                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3186                                 PIPE_CONFIG(ADDR_SURF_P2) |
3187                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3188                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3189                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3190                                 PIPE_CONFIG(ADDR_SURF_P2) |
3191                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3192                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3193                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3194                                 PIPE_CONFIG(ADDR_SURF_P2) |
3195                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3196                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3197                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3198                                 PIPE_CONFIG(ADDR_SURF_P2));
3199                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3200                                 PIPE_CONFIG(ADDR_SURF_P2) |
3201                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3202                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3204                                  PIPE_CONFIG(ADDR_SURF_P2) |
3205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3207                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3208                                  PIPE_CONFIG(ADDR_SURF_P2) |
3209                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3210                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3211                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3212                                  PIPE_CONFIG(ADDR_SURF_P2) |
3213                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3214                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3215                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3216                                  PIPE_CONFIG(ADDR_SURF_P2) |
3217                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3218                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3219                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3220                                  PIPE_CONFIG(ADDR_SURF_P2) |
3221                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3222                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3223                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3224                                  PIPE_CONFIG(ADDR_SURF_P2) |
3225                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3226                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3227                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3228                                  PIPE_CONFIG(ADDR_SURF_P2) |
3229                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3230                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3231                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3232                                  PIPE_CONFIG(ADDR_SURF_P2) |
3233                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3234                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3235                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3236                                  PIPE_CONFIG(ADDR_SURF_P2) |
3237                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3238                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3239                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3240                                  PIPE_CONFIG(ADDR_SURF_P2) |
3241                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3242                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3243                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3244                                  PIPE_CONFIG(ADDR_SURF_P2) |
3245                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3246                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3247                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3248                                  PIPE_CONFIG(ADDR_SURF_P2) |
3249                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3250                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3251                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3252                                  PIPE_CONFIG(ADDR_SURF_P2) |
3253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3255                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3256                                  PIPE_CONFIG(ADDR_SURF_P2) |
3257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3259                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3260                                  PIPE_CONFIG(ADDR_SURF_P2) |
3261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3263                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3264                                  PIPE_CONFIG(ADDR_SURF_P2) |
3265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3267                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3268                                  PIPE_CONFIG(ADDR_SURF_P2) |
3269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3271
3272                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3273                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3274                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3275                                 NUM_BANKS(ADDR_SURF_8_BANK));
3276                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3277                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3278                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3279                                 NUM_BANKS(ADDR_SURF_8_BANK));
3280                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3281                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3282                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3283                                 NUM_BANKS(ADDR_SURF_8_BANK));
3284                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3285                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3286                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3287                                 NUM_BANKS(ADDR_SURF_8_BANK));
3288                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3289                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3290                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3291                                 NUM_BANKS(ADDR_SURF_8_BANK));
3292                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3293                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3294                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3295                                 NUM_BANKS(ADDR_SURF_8_BANK));
3296                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3297                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3298                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3299                                 NUM_BANKS(ADDR_SURF_8_BANK));
3300                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3301                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3302                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3303                                 NUM_BANKS(ADDR_SURF_16_BANK));
3304                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3305                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307                                 NUM_BANKS(ADDR_SURF_16_BANK));
3308                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3309                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3310                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3311                                  NUM_BANKS(ADDR_SURF_16_BANK));
3312                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3313                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3314                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3315                                  NUM_BANKS(ADDR_SURF_16_BANK));
3316                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319                                  NUM_BANKS(ADDR_SURF_16_BANK));
3320                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3321                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3322                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3323                                  NUM_BANKS(ADDR_SURF_16_BANK));
3324                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3325                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3326                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3327                                  NUM_BANKS(ADDR_SURF_8_BANK));
3328
3329                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3330                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3331                             reg_offset != 23)
3332                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3333
3334                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3335                         if (reg_offset != 7)
3336                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3337
3338                 break;
3339         }
3340 }
3341
3342 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
3343 {
3344         u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3345
3346         if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3347                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3348                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3349         } else if (se_num == 0xffffffff) {
3350                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3351                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3352         } else if (sh_num == 0xffffffff) {
3353                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3354                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3355         } else {
3356                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3357                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3358         }
3359         WREG32(mmGRBM_GFX_INDEX, data);
3360 }
3361
3362 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3363 {
3364         return (u32)((1ULL << bit_width) - 1);
3365 }
3366
3367 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3368 {
3369         u32 data, mask;
3370
3371         data = RREG32(mmCC_RB_BACKEND_DISABLE);
3372         data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3373
3374         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3375         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3376
3377         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3378                                        adev->gfx.config.max_sh_per_se);
3379
3380         return (~data) & mask;
3381 }
3382
3383 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3384 {
3385         int i, j;
3386         u32 data;
3387         u32 active_rbs = 0;
3388         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3389                                         adev->gfx.config.max_sh_per_se;
3390
3391         mutex_lock(&adev->grbm_idx_mutex);
3392         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3393                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3394                         gfx_v8_0_select_se_sh(adev, i, j);
3395                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3396                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3397                                                rb_bitmap_width_per_sh);
3398                 }
3399         }
3400         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3401         mutex_unlock(&adev->grbm_idx_mutex);
3402
3403         adev->gfx.config.backend_enable_mask = active_rbs;
3404         adev->gfx.config.num_rbs = hweight32(active_rbs);
3405 }
3406
3407 /**
3408  * gfx_v8_0_init_compute_vmid - gart enable
3409  *
3410  * @rdev: amdgpu_device pointer
3411  *
3412  * Initialize compute vmid sh_mem registers
3413  *
3414  */
3415 #define DEFAULT_SH_MEM_BASES    (0x6000)
3416 #define FIRST_COMPUTE_VMID      (8)
3417 #define LAST_COMPUTE_VMID       (16)
3418 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3419 {
3420         int i;
3421         uint32_t sh_mem_config;
3422         uint32_t sh_mem_bases;
3423
3424         /*
3425          * Configure apertures:
3426          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3427          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3428          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3429          */
3430         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3431
3432         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3433                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3434                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3435                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3436                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3437                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3438
3439         mutex_lock(&adev->srbm_mutex);
3440         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3441                 vi_srbm_select(adev, 0, 0, 0, i);
3442                 /* CP and shaders */
3443                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3444                 WREG32(mmSH_MEM_APE1_BASE, 1);
3445                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3446                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3447         }
3448         vi_srbm_select(adev, 0, 0, 0, 0);
3449         mutex_unlock(&adev->srbm_mutex);
3450 }
3451
3452 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3453 {
3454         u32 tmp;
3455         int i;
3456
3457         tmp = RREG32(mmGRBM_CNTL);
3458         tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3459         WREG32(mmGRBM_CNTL, tmp);
3460
3461         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3462         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3463         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3464
3465         gfx_v8_0_tiling_mode_table_init(adev);
3466
3467         gfx_v8_0_setup_rb(adev);
3468         gfx_v8_0_get_cu_info(adev);
3469
3470         /* XXX SH_MEM regs */
3471         /* where to put LDS, scratch, GPUVM in FSA64 space */
3472         mutex_lock(&adev->srbm_mutex);
3473         for (i = 0; i < 16; i++) {
3474                 vi_srbm_select(adev, 0, 0, 0, i);
3475                 /* CP and shaders */
3476                 if (i == 0) {
3477                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3478                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3479                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3480                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3481                         WREG32(mmSH_MEM_CONFIG, tmp);
3482                 } else {
3483                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3484                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3485                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3486                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3487                         WREG32(mmSH_MEM_CONFIG, tmp);
3488                 }
3489
3490                 WREG32(mmSH_MEM_APE1_BASE, 1);
3491                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3492                 WREG32(mmSH_MEM_BASES, 0);
3493         }
3494         vi_srbm_select(adev, 0, 0, 0, 0);
3495         mutex_unlock(&adev->srbm_mutex);
3496
3497         gfx_v8_0_init_compute_vmid(adev);
3498
3499         mutex_lock(&adev->grbm_idx_mutex);
3500         /*
3501          * making sure that the following register writes will be broadcasted
3502          * to all the shaders
3503          */
3504         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3505
3506         WREG32(mmPA_SC_FIFO_SIZE,
3507                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3508                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3509                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3510                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3511                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3512                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3513                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3514                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3515         mutex_unlock(&adev->grbm_idx_mutex);
3516
3517 }
3518
3519 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3520 {
3521         u32 i, j, k;
3522         u32 mask;
3523
3524         mutex_lock(&adev->grbm_idx_mutex);
3525         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3526                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3527                         gfx_v8_0_select_se_sh(adev, i, j);
3528                         for (k = 0; k < adev->usec_timeout; k++) {
3529                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3530                                         break;
3531                                 udelay(1);
3532                         }
3533                 }
3534         }
3535         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3536         mutex_unlock(&adev->grbm_idx_mutex);
3537
3538         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3539                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3540                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3541                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3542         for (k = 0; k < adev->usec_timeout; k++) {
3543                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3544                         break;
3545                 udelay(1);
3546         }
3547 }
3548
3549 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3550                                                bool enable)
3551 {
3552         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3553
3554         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3555         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3556         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3557         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3558
3559         WREG32(mmCP_INT_CNTL_RING0, tmp);
3560 }
3561
3562 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3563 {
3564         /* csib */
3565         WREG32(mmRLC_CSIB_ADDR_HI,
3566                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3567         WREG32(mmRLC_CSIB_ADDR_LO,
3568                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3569         WREG32(mmRLC_CSIB_LENGTH,
3570                         adev->gfx.rlc.clear_state_size);
3571 }
3572
3573 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3574                                 int ind_offset,
3575                                 int list_size,
3576                                 int *unique_indices,
3577                                 int *indices_count,
3578                                 int max_indices,
3579                                 int *ind_start_offsets,
3580                                 int *offset_count,
3581                                 int max_offset)
3582 {
3583         int indices;
3584         bool new_entry = true;
3585
3586         for (; ind_offset < list_size; ind_offset++) {
3587
3588                 if (new_entry) {
3589                         new_entry = false;
3590                         ind_start_offsets[*offset_count] = ind_offset;
3591                         *offset_count = *offset_count + 1;
3592                         BUG_ON(*offset_count >= max_offset);
3593                 }
3594
3595                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3596                         new_entry = true;
3597                         continue;
3598                 }
3599
3600                 ind_offset += 2;
3601
3602                 /* look for the matching indice */
3603                 for (indices = 0;
3604                         indices < *indices_count;
3605                         indices++) {
3606                         if (unique_indices[indices] ==
3607                                 register_list_format[ind_offset])
3608                                 break;
3609                 }
3610
3611                 if (indices >= *indices_count) {
3612                         unique_indices[*indices_count] =
3613                                 register_list_format[ind_offset];
3614                         indices = *indices_count;
3615                         *indices_count = *indices_count + 1;
3616                         BUG_ON(*indices_count >= max_indices);
3617                 }
3618
3619                 register_list_format[ind_offset] = indices;
3620         }
3621 }
3622
3623 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3624 {
3625         int i, temp, data;
3626         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3627         int indices_count = 0;
3628         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3629         int offset_count = 0;
3630
3631         int list_size;
3632         unsigned int *register_list_format =
3633                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3634         if (register_list_format == NULL)
3635                 return -ENOMEM;
3636         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3637                         adev->gfx.rlc.reg_list_format_size_bytes);
3638
3639         gfx_v8_0_parse_ind_reg_list(register_list_format,
3640                                 RLC_FormatDirectRegListLength,
3641                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3642                                 unique_indices,
3643                                 &indices_count,
3644                                 sizeof(unique_indices) / sizeof(int),
3645                                 indirect_start_offsets,
3646                                 &offset_count,
3647                                 sizeof(indirect_start_offsets)/sizeof(int));
3648
3649         /* save and restore list */
3650         temp = RREG32(mmRLC_SRM_CNTL);
3651         temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3652         WREG32(mmRLC_SRM_CNTL, temp);
3653
3654         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3655         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3656                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3657
3658         /* indirect list */
3659         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3660         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3661                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3662
3663         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3664         list_size = list_size >> 1;
3665         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3666         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3667
3668         /* starting offsets starts */
3669         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3670                 adev->gfx.rlc.starting_offsets_start);
3671         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3672                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3673                                 indirect_start_offsets[i]);
3674
3675         /* unique indices */
3676         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3677         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3678         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3679                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3680                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3681         }
3682         kfree(register_list_format);
3683
3684         return 0;
3685 }
3686
3687 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3688 {
3689         uint32_t data;
3690
3691         data = RREG32(mmRLC_SRM_CNTL);
3692         data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3693         WREG32(mmRLC_SRM_CNTL, data);
3694 }
3695
3696 static void polaris11_init_power_gating(struct amdgpu_device *adev)
3697 {
3698         uint32_t data;
3699
3700         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3701                         AMD_PG_SUPPORT_GFX_SMG |
3702                         AMD_PG_SUPPORT_GFX_DMG)) {
3703                 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3704                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3705                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3706                 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3707
3708                 data = 0;
3709                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3710                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3711                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3712                 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3713                 WREG32(mmRLC_PG_DELAY, data);
3714
3715                 data = RREG32(mmRLC_PG_DELAY_2);
3716                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3717                 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3718                 WREG32(mmRLC_PG_DELAY_2, data);
3719
3720                 data = RREG32(mmRLC_AUTO_PG_CTRL);
3721                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3722                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3723                 WREG32(mmRLC_AUTO_PG_CTRL, data);
3724         }
3725 }
3726
3727 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3728 {
3729         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3730                               AMD_PG_SUPPORT_GFX_SMG |
3731                               AMD_PG_SUPPORT_GFX_DMG |
3732                               AMD_PG_SUPPORT_CP |
3733                               AMD_PG_SUPPORT_GDS |
3734                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3735                 gfx_v8_0_init_csb(adev);
3736                 gfx_v8_0_init_save_restore_list(adev);
3737                 gfx_v8_0_enable_save_restore_machine(adev);
3738
3739                 if (adev->asic_type == CHIP_POLARIS11)
3740                         polaris11_init_power_gating(adev);
3741         }
3742 }
3743
3744 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3745 {
3746         u32 tmp = RREG32(mmRLC_CNTL);
3747
3748         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3749         WREG32(mmRLC_CNTL, tmp);
3750
3751         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3752
3753         gfx_v8_0_wait_for_rlc_serdes(adev);
3754 }
3755
3756 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3757 {
3758         u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3759
3760         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3761         WREG32(mmGRBM_SOFT_RESET, tmp);
3762         udelay(50);
3763         tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3764         WREG32(mmGRBM_SOFT_RESET, tmp);
3765         udelay(50);
3766 }
3767
3768 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3769 {
3770         u32 tmp = RREG32(mmRLC_CNTL);
3771
3772         tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3773         WREG32(mmRLC_CNTL, tmp);
3774
3775         /* carrizo do enable cp interrupt after cp inited */
3776         if (!(adev->flags & AMD_IS_APU))
3777                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3778
3779         udelay(50);
3780 }
3781
3782 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3783 {
3784         const struct rlc_firmware_header_v2_0 *hdr;
3785         const __le32 *fw_data;
3786         unsigned i, fw_size;
3787
3788         if (!adev->gfx.rlc_fw)
3789                 return -EINVAL;
3790
3791         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3792         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3793
3794         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3795                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3796         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3797
3798         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3799         for (i = 0; i < fw_size; i++)
3800                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3801         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3802
3803         return 0;
3804 }
3805
3806 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3807 {
3808         int r;
3809
3810         gfx_v8_0_rlc_stop(adev);
3811
3812         /* disable CG */
3813         WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3814         if (adev->asic_type == CHIP_POLARIS11 ||
3815                 adev->asic_type == CHIP_POLARIS10)
3816                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3817
3818         /* disable PG */
3819         WREG32(mmRLC_PG_CNTL, 0);
3820
3821         gfx_v8_0_rlc_reset(adev);
3822
3823         gfx_v8_0_init_pg(adev);
3824
3825         if (!adev->pp_enabled) {
3826                 if (!adev->firmware.smu_load) {
3827                         /* legacy rlc firmware loading */
3828                         r = gfx_v8_0_rlc_load_microcode(adev);
3829                         if (r)
3830                                 return r;
3831                 } else {
3832                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3833                                                         AMDGPU_UCODE_ID_RLC_G);
3834                         if (r)
3835                                 return -EINVAL;
3836                 }
3837         }
3838
3839         gfx_v8_0_rlc_start(adev);
3840
3841         return 0;
3842 }
3843
3844 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3845 {
3846         int i;
3847         u32 tmp = RREG32(mmCP_ME_CNTL);
3848
3849         if (enable) {
3850                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
3851                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
3852                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
3853         } else {
3854                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
3855                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
3856                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
3857                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3858                         adev->gfx.gfx_ring[i].ready = false;
3859         }
3860         WREG32(mmCP_ME_CNTL, tmp);
3861         udelay(50);
3862 }
3863
3864 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3865 {
3866         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3867         const struct gfx_firmware_header_v1_0 *ce_hdr;
3868         const struct gfx_firmware_header_v1_0 *me_hdr;
3869         const __le32 *fw_data;
3870         unsigned i, fw_size;
3871
3872         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3873                 return -EINVAL;
3874
3875         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3876                 adev->gfx.pfp_fw->data;
3877         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3878                 adev->gfx.ce_fw->data;
3879         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3880                 adev->gfx.me_fw->data;
3881
3882         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3883         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3884         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3885
3886         gfx_v8_0_cp_gfx_enable(adev, false);
3887
3888         /* PFP */
3889         fw_data = (const __le32 *)
3890                 (adev->gfx.pfp_fw->data +
3891                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3892         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3893         WREG32(mmCP_PFP_UCODE_ADDR, 0);
3894         for (i = 0; i < fw_size; i++)
3895                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3896         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3897
3898         /* CE */
3899         fw_data = (const __le32 *)
3900                 (adev->gfx.ce_fw->data +
3901                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3902         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3903         WREG32(mmCP_CE_UCODE_ADDR, 0);
3904         for (i = 0; i < fw_size; i++)
3905                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3906         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3907
3908         /* ME */
3909         fw_data = (const __le32 *)
3910                 (adev->gfx.me_fw->data +
3911                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3912         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3913         WREG32(mmCP_ME_RAM_WADDR, 0);
3914         for (i = 0; i < fw_size; i++)
3915                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3916         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3917
3918         return 0;
3919 }
3920
3921 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3922 {
3923         u32 count = 0;
3924         const struct cs_section_def *sect = NULL;
3925         const struct cs_extent_def *ext = NULL;
3926
3927         /* begin clear state */
3928         count += 2;
3929         /* context control state */
3930         count += 3;
3931
3932         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3933                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3934                         if (sect->id == SECT_CONTEXT)
3935                                 count += 2 + ext->reg_count;
3936                         else
3937                                 return 0;
3938                 }
3939         }
3940         /* pa_sc_raster_config/pa_sc_raster_config1 */
3941         count += 4;
3942         /* end clear state */
3943         count += 2;
3944         /* clear state */
3945         count += 2;
3946
3947         return count;
3948 }
3949
3950 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3951 {
3952         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3953         const struct cs_section_def *sect = NULL;
3954         const struct cs_extent_def *ext = NULL;
3955         int r, i;
3956
3957         /* init the CP */
3958         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3959         WREG32(mmCP_ENDIAN_SWAP, 0);
3960         WREG32(mmCP_DEVICE_ID, 1);
3961
3962         gfx_v8_0_cp_gfx_enable(adev, true);
3963
3964         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3965         if (r) {
3966                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3967                 return r;
3968         }
3969
3970         /* clear state buffer */
3971         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3972         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3973
3974         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3975         amdgpu_ring_write(ring, 0x80000000);
3976         amdgpu_ring_write(ring, 0x80000000);
3977
3978         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3979                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3980                         if (sect->id == SECT_CONTEXT) {
3981                                 amdgpu_ring_write(ring,
3982                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3983                                                ext->reg_count));
3984                                 amdgpu_ring_write(ring,
3985                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3986                                 for (i = 0; i < ext->reg_count; i++)
3987                                         amdgpu_ring_write(ring, ext->extent[i]);
3988                         }
3989                 }
3990         }
3991
3992         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3993         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3994         switch (adev->asic_type) {
3995         case CHIP_TONGA:
3996         case CHIP_POLARIS10:
3997                 amdgpu_ring_write(ring, 0x16000012);
3998                 amdgpu_ring_write(ring, 0x0000002A);
3999                 break;
4000         case CHIP_POLARIS11:
4001                 amdgpu_ring_write(ring, 0x16000012);
4002                 amdgpu_ring_write(ring, 0x00000000);
4003                 break;
4004         case CHIP_FIJI:
4005                 amdgpu_ring_write(ring, 0x3a00161a);
4006                 amdgpu_ring_write(ring, 0x0000002e);
4007                 break;
4008         case CHIP_CARRIZO:
4009                 amdgpu_ring_write(ring, 0x00000002);
4010                 amdgpu_ring_write(ring, 0x00000000);
4011                 break;
4012         case CHIP_TOPAZ:
4013                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4014                                 0x00000000 : 0x00000002);
4015                 amdgpu_ring_write(ring, 0x00000000);
4016                 break;
4017         case CHIP_STONEY:
4018                 amdgpu_ring_write(ring, 0x00000000);
4019                 amdgpu_ring_write(ring, 0x00000000);
4020                 break;
4021         default:
4022                 BUG();
4023         }
4024
4025         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4026         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4027
4028         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4029         amdgpu_ring_write(ring, 0);
4030
4031         /* init the CE partitions */
4032         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4033         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4034         amdgpu_ring_write(ring, 0x8000);
4035         amdgpu_ring_write(ring, 0x8000);
4036
4037         amdgpu_ring_commit(ring);
4038
4039         return 0;
4040 }
4041
4042 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4043 {
4044         struct amdgpu_ring *ring;
4045         u32 tmp;
4046         u32 rb_bufsz;
4047         u64 rb_addr, rptr_addr;
4048         int r;
4049
4050         /* Set the write pointer delay */
4051         WREG32(mmCP_RB_WPTR_DELAY, 0);
4052
4053         /* set the RB to use vmid 0 */
4054         WREG32(mmCP_RB_VMID, 0);
4055
4056         /* Set ring buffer size */
4057         ring = &adev->gfx.gfx_ring[0];
4058         rb_bufsz = order_base_2(ring->ring_size / 8);
4059         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4060         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4061         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4062         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4063 #ifdef __BIG_ENDIAN
4064         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4065 #endif
4066         WREG32(mmCP_RB0_CNTL, tmp);
4067
4068         /* Initialize the ring buffer's read and write pointers */
4069         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4070         ring->wptr = 0;
4071         WREG32(mmCP_RB0_WPTR, ring->wptr);
4072
4073         /* set the wb address wether it's enabled or not */
4074         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4075         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4076         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4077
4078         mdelay(1);
4079         WREG32(mmCP_RB0_CNTL, tmp);
4080
4081         rb_addr = ring->gpu_addr >> 8;
4082         WREG32(mmCP_RB0_BASE, rb_addr);
4083         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4084
4085         /* no gfx doorbells on iceland */
4086         if (adev->asic_type != CHIP_TOPAZ) {
4087                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4088                 if (ring->use_doorbell) {
4089                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4090                                             DOORBELL_OFFSET, ring->doorbell_index);
4091                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4092                                             DOORBELL_HIT, 0);
4093                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4094                                             DOORBELL_EN, 1);
4095                 } else {
4096                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4097                                             DOORBELL_EN, 0);
4098                 }
4099                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4100
4101                 if (adev->asic_type == CHIP_TONGA) {
4102                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4103                                             DOORBELL_RANGE_LOWER,
4104                                             AMDGPU_DOORBELL_GFX_RING0);
4105                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4106
4107                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4108                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4109                 }
4110
4111         }
4112
4113         /* start the ring */
4114         gfx_v8_0_cp_gfx_start(adev);
4115         ring->ready = true;
4116         r = amdgpu_ring_test_ring(ring);
4117         if (r) {
4118                 ring->ready = false;
4119                 return r;
4120         }
4121
4122         return 0;
4123 }
4124
4125 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4126 {
4127         int i;
4128
4129         if (enable) {
4130                 WREG32(mmCP_MEC_CNTL, 0);
4131         } else {
4132                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4133                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4134                         adev->gfx.compute_ring[i].ready = false;
4135         }
4136         udelay(50);
4137 }
4138
4139 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4140 {
4141         const struct gfx_firmware_header_v1_0 *mec_hdr;
4142         const __le32 *fw_data;
4143         unsigned i, fw_size;
4144
4145         if (!adev->gfx.mec_fw)
4146                 return -EINVAL;
4147
4148         gfx_v8_0_cp_compute_enable(adev, false);
4149
4150         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4151         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4152
4153         fw_data = (const __le32 *)
4154                 (adev->gfx.mec_fw->data +
4155                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4156         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4157
4158         /* MEC1 */
4159         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4160         for (i = 0; i < fw_size; i++)
4161                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4162         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4163
4164         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4165         if (adev->gfx.mec2_fw) {
4166                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4167
4168                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4169                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4170
4171                 fw_data = (const __le32 *)
4172                         (adev->gfx.mec2_fw->data +
4173                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4174                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4175
4176                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4177                 for (i = 0; i < fw_size; i++)
4178                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4179                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4180         }
4181
4182         return 0;
4183 }
4184
4185 struct vi_mqd {
4186         uint32_t header;  /* ordinal0 */
4187         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4188         uint32_t compute_dim_x;  /* ordinal2 */
4189         uint32_t compute_dim_y;  /* ordinal3 */
4190         uint32_t compute_dim_z;  /* ordinal4 */
4191         uint32_t compute_start_x;  /* ordinal5 */
4192         uint32_t compute_start_y;  /* ordinal6 */
4193         uint32_t compute_start_z;  /* ordinal7 */
4194         uint32_t compute_num_thread_x;  /* ordinal8 */
4195         uint32_t compute_num_thread_y;  /* ordinal9 */
4196         uint32_t compute_num_thread_z;  /* ordinal10 */
4197         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4198         uint32_t compute_perfcount_enable;  /* ordinal12 */
4199         uint32_t compute_pgm_lo;  /* ordinal13 */
4200         uint32_t compute_pgm_hi;  /* ordinal14 */
4201         uint32_t compute_tba_lo;  /* ordinal15 */
4202         uint32_t compute_tba_hi;  /* ordinal16 */
4203         uint32_t compute_tma_lo;  /* ordinal17 */
4204         uint32_t compute_tma_hi;  /* ordinal18 */
4205         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4206         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4207         uint32_t compute_vmid;  /* ordinal21 */
4208         uint32_t compute_resource_limits;  /* ordinal22 */
4209         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4210         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4211         uint32_t compute_tmpring_size;  /* ordinal25 */
4212         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4213         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4214         uint32_t compute_restart_x;  /* ordinal28 */
4215         uint32_t compute_restart_y;  /* ordinal29 */
4216         uint32_t compute_restart_z;  /* ordinal30 */
4217         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4218         uint32_t compute_misc_reserved;  /* ordinal32 */
4219         uint32_t compute_dispatch_id;  /* ordinal33 */
4220         uint32_t compute_threadgroup_id;  /* ordinal34 */
4221         uint32_t compute_relaunch;  /* ordinal35 */
4222         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4223         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4224         uint32_t compute_wave_restore_control;  /* ordinal38 */
4225         uint32_t reserved9;  /* ordinal39 */
4226         uint32_t reserved10;  /* ordinal40 */
4227         uint32_t reserved11;  /* ordinal41 */
4228         uint32_t reserved12;  /* ordinal42 */
4229         uint32_t reserved13;  /* ordinal43 */
4230         uint32_t reserved14;  /* ordinal44 */
4231         uint32_t reserved15;  /* ordinal45 */
4232         uint32_t reserved16;  /* ordinal46 */
4233         uint32_t reserved17;  /* ordinal47 */
4234         uint32_t reserved18;  /* ordinal48 */
4235         uint32_t reserved19;  /* ordinal49 */
4236         uint32_t reserved20;  /* ordinal50 */
4237         uint32_t reserved21;  /* ordinal51 */
4238         uint32_t reserved22;  /* ordinal52 */
4239         uint32_t reserved23;  /* ordinal53 */
4240         uint32_t reserved24;  /* ordinal54 */
4241         uint32_t reserved25;  /* ordinal55 */
4242         uint32_t reserved26;  /* ordinal56 */
4243         uint32_t reserved27;  /* ordinal57 */
4244         uint32_t reserved28;  /* ordinal58 */
4245         uint32_t reserved29;  /* ordinal59 */
4246         uint32_t reserved30;  /* ordinal60 */
4247         uint32_t reserved31;  /* ordinal61 */
4248         uint32_t reserved32;  /* ordinal62 */
4249         uint32_t reserved33;  /* ordinal63 */
4250         uint32_t reserved34;  /* ordinal64 */
4251         uint32_t compute_user_data_0;  /* ordinal65 */
4252         uint32_t compute_user_data_1;  /* ordinal66 */
4253         uint32_t compute_user_data_2;  /* ordinal67 */
4254         uint32_t compute_user_data_3;  /* ordinal68 */
4255         uint32_t compute_user_data_4;  /* ordinal69 */
4256         uint32_t compute_user_data_5;  /* ordinal70 */
4257         uint32_t compute_user_data_6;  /* ordinal71 */
4258         uint32_t compute_user_data_7;  /* ordinal72 */
4259         uint32_t compute_user_data_8;  /* ordinal73 */
4260         uint32_t compute_user_data_9;  /* ordinal74 */
4261         uint32_t compute_user_data_10;  /* ordinal75 */
4262         uint32_t compute_user_data_11;  /* ordinal76 */
4263         uint32_t compute_user_data_12;  /* ordinal77 */
4264         uint32_t compute_user_data_13;  /* ordinal78 */
4265         uint32_t compute_user_data_14;  /* ordinal79 */
4266         uint32_t compute_user_data_15;  /* ordinal80 */
4267         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4268         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4269         uint32_t reserved35;  /* ordinal83 */
4270         uint32_t reserved36;  /* ordinal84 */
4271         uint32_t reserved37;  /* ordinal85 */
4272         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4273         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4274         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4275         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4276         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4277         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4278         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4279         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4280         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4281         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4282         uint32_t reserved38;  /* ordinal96 */
4283         uint32_t reserved39;  /* ordinal97 */
4284         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4285         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4286         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4287         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4288         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4289         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4290         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4291         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4292         uint32_t reserved40;  /* ordinal106 */
4293         uint32_t reserved41;  /* ordinal107 */
4294         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4295         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4296         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4297         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4298         uint32_t reserved42;  /* ordinal112 */
4299         uint32_t reserved43;  /* ordinal113 */
4300         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4301         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4302         uint32_t cp_packet_id_lo;  /* ordinal116 */
4303         uint32_t cp_packet_id_hi;  /* ordinal117 */
4304         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4305         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4306         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4307         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4308         uint32_t gds_save_mask_lo;  /* ordinal122 */
4309         uint32_t gds_save_mask_hi;  /* ordinal123 */
4310         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4311         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4312         uint32_t reserved44;  /* ordinal126 */
4313         uint32_t reserved45;  /* ordinal127 */
4314         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4315         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4316         uint32_t cp_hqd_active;  /* ordinal130 */
4317         uint32_t cp_hqd_vmid;  /* ordinal131 */
4318         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4319         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4320         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4321         uint32_t cp_hqd_quantum;  /* ordinal135 */
4322         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4323         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4324         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4325         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4326         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4327         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4328         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4329         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4330         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4331         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4332         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4333         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4334         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4335         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4336         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4337         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4338         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4339         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4340         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4341         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4342         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4343         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4344         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4345         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4346         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4347         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4348         uint32_t cp_mqd_control;  /* ordinal162 */
4349         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4350         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4351         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4352         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4353         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4354         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4355         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4356         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4357         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4358         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4359         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4360         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4361         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4362         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4363         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4364         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4365         uint32_t cp_hqd_error;  /* ordinal179 */
4366         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4367         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4368         uint32_t reserved46;  /* ordinal182 */
4369         uint32_t reserved47;  /* ordinal183 */
4370         uint32_t reserved48;  /* ordinal184 */
4371         uint32_t reserved49;  /* ordinal185 */
4372         uint32_t reserved50;  /* ordinal186 */
4373         uint32_t reserved51;  /* ordinal187 */
4374         uint32_t reserved52;  /* ordinal188 */
4375         uint32_t reserved53;  /* ordinal189 */
4376         uint32_t reserved54;  /* ordinal190 */
4377         uint32_t reserved55;  /* ordinal191 */
4378         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4379         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4380         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4381         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4382         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4383         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4384         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4385         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4386         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4387         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4388         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4389         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4390         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4391         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4392         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4393         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4394         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4395         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4396         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4397         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4398         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4399         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4400         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4401         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4402         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4403         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4404         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4405         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4406         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4407         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4408         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4409         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4410         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4411         uint32_t reserved56;  /* ordinal225 */
4412         uint32_t reserved57;  /* ordinal226 */
4413         uint32_t reserved58;  /* ordinal227 */
4414         uint32_t set_resources_header;  /* ordinal228 */
4415         uint32_t set_resources_dw1;  /* ordinal229 */
4416         uint32_t set_resources_dw2;  /* ordinal230 */
4417         uint32_t set_resources_dw3;  /* ordinal231 */
4418         uint32_t set_resources_dw4;  /* ordinal232 */
4419         uint32_t set_resources_dw5;  /* ordinal233 */
4420         uint32_t set_resources_dw6;  /* ordinal234 */
4421         uint32_t set_resources_dw7;  /* ordinal235 */
4422         uint32_t reserved59;  /* ordinal236 */
4423         uint32_t reserved60;  /* ordinal237 */
4424         uint32_t reserved61;  /* ordinal238 */
4425         uint32_t reserved62;  /* ordinal239 */
4426         uint32_t reserved63;  /* ordinal240 */
4427         uint32_t reserved64;  /* ordinal241 */
4428         uint32_t reserved65;  /* ordinal242 */
4429         uint32_t reserved66;  /* ordinal243 */
4430         uint32_t reserved67;  /* ordinal244 */
4431         uint32_t reserved68;  /* ordinal245 */
4432         uint32_t reserved69;  /* ordinal246 */
4433         uint32_t reserved70;  /* ordinal247 */
4434         uint32_t reserved71;  /* ordinal248 */
4435         uint32_t reserved72;  /* ordinal249 */
4436         uint32_t reserved73;  /* ordinal250 */
4437         uint32_t reserved74;  /* ordinal251 */
4438         uint32_t reserved75;  /* ordinal252 */
4439         uint32_t reserved76;  /* ordinal253 */
4440         uint32_t reserved77;  /* ordinal254 */
4441         uint32_t reserved78;  /* ordinal255 */
4442
4443         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4444 };
4445
4446 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4447 {
4448         int i, r;
4449
4450         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4451                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4452
4453                 if (ring->mqd_obj) {
4454                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4455                         if (unlikely(r != 0))
4456                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4457
4458                         amdgpu_bo_unpin(ring->mqd_obj);
4459                         amdgpu_bo_unreserve(ring->mqd_obj);
4460
4461                         amdgpu_bo_unref(&ring->mqd_obj);
4462                         ring->mqd_obj = NULL;
4463                 }
4464         }
4465 }
4466
4467 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4468 {
4469         int r, i, j;
4470         u32 tmp;
4471         bool use_doorbell = true;
4472         u64 hqd_gpu_addr;
4473         u64 mqd_gpu_addr;
4474         u64 eop_gpu_addr;
4475         u64 wb_gpu_addr;
4476         u32 *buf;
4477         struct vi_mqd *mqd;
4478
4479         /* init the pipes */
4480         mutex_lock(&adev->srbm_mutex);
4481         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4482                 int me = (i < 4) ? 1 : 2;
4483                 int pipe = (i < 4) ? i : (i - 4);
4484
4485                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4486                 eop_gpu_addr >>= 8;
4487
4488                 vi_srbm_select(adev, me, pipe, 0, 0);
4489
4490                 /* write the EOP addr */
4491                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4492                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4493
4494                 /* set the VMID assigned */
4495                 WREG32(mmCP_HQD_VMID, 0);
4496
4497                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4498                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4499                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4500                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4501                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4502         }
4503         vi_srbm_select(adev, 0, 0, 0, 0);
4504         mutex_unlock(&adev->srbm_mutex);
4505
4506         /* init the queues.  Just two for now. */
4507         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4508                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4509
4510                 if (ring->mqd_obj == NULL) {
4511                         r = amdgpu_bo_create(adev,
4512                                              sizeof(struct vi_mqd),
4513                                              PAGE_SIZE, true,
4514                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4515                                              NULL, &ring->mqd_obj);
4516                         if (r) {
4517                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4518                                 return r;
4519                         }
4520                 }
4521
4522                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4523                 if (unlikely(r != 0)) {
4524                         gfx_v8_0_cp_compute_fini(adev);
4525                         return r;
4526                 }
4527                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4528                                   &mqd_gpu_addr);
4529                 if (r) {
4530                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4531                         gfx_v8_0_cp_compute_fini(adev);
4532                         return r;
4533                 }
4534                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4535                 if (r) {
4536                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4537                         gfx_v8_0_cp_compute_fini(adev);
4538                         return r;
4539                 }
4540
4541                 /* init the mqd struct */
4542                 memset(buf, 0, sizeof(struct vi_mqd));
4543
4544                 mqd = (struct vi_mqd *)buf;
4545                 mqd->header = 0xC0310800;
4546                 mqd->compute_pipelinestat_enable = 0x00000001;
4547                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4548                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4549                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4550                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4551                 mqd->compute_misc_reserved = 0x00000003;
4552
4553                 mutex_lock(&adev->srbm_mutex);
4554                 vi_srbm_select(adev, ring->me,
4555                                ring->pipe,
4556                                ring->queue, 0);
4557
4558                 /* disable wptr polling */
4559                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4560                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4561                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4562
4563                 mqd->cp_hqd_eop_base_addr_lo =
4564                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4565                 mqd->cp_hqd_eop_base_addr_hi =
4566                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4567
4568                 /* enable doorbell? */
4569                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4570                 if (use_doorbell) {
4571                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4572                 } else {
4573                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4574                 }
4575                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4576                 mqd->cp_hqd_pq_doorbell_control = tmp;
4577
4578                 /* disable the queue if it's active */
4579                 mqd->cp_hqd_dequeue_request = 0;
4580                 mqd->cp_hqd_pq_rptr = 0;
4581                 mqd->cp_hqd_pq_wptr= 0;
4582                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4583                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4584                         for (j = 0; j < adev->usec_timeout; j++) {
4585                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4586                                         break;
4587                                 udelay(1);
4588                         }
4589                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4590                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4591                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4592                 }
4593
4594                 /* set the pointer to the MQD */
4595                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4596                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4597                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4598                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4599
4600                 /* set MQD vmid to 0 */
4601                 tmp = RREG32(mmCP_MQD_CONTROL);
4602                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4603                 WREG32(mmCP_MQD_CONTROL, tmp);
4604                 mqd->cp_mqd_control = tmp;
4605
4606                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4607                 hqd_gpu_addr = ring->gpu_addr >> 8;
4608                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4609                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4610                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4611                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4612
4613                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4614                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4615                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4616                                     (order_base_2(ring->ring_size / 4) - 1));
4617                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4618                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4619 #ifdef __BIG_ENDIAN
4620                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4621 #endif
4622                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4623                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4624                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4625                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4626                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4627                 mqd->cp_hqd_pq_control = tmp;
4628
4629                 /* set the wb address wether it's enabled or not */
4630                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4631                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4632                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4633                         upper_32_bits(wb_gpu_addr) & 0xffff;
4634                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4635                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4636                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4637                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4638
4639                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4640                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4641                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4642                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4643                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4644                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4645                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4646
4647                 /* enable the doorbell if requested */
4648                 if (use_doorbell) {
4649                         if ((adev->asic_type == CHIP_CARRIZO) ||
4650                             (adev->asic_type == CHIP_FIJI) ||
4651                             (adev->asic_type == CHIP_STONEY) ||
4652                             (adev->asic_type == CHIP_POLARIS11) ||
4653                             (adev->asic_type == CHIP_POLARIS10)) {
4654                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4655                                        AMDGPU_DOORBELL_KIQ << 2);
4656                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4657                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4658                         }
4659                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4660                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4661                                             DOORBELL_OFFSET, ring->doorbell_index);
4662                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4663                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4664                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4665                         mqd->cp_hqd_pq_doorbell_control = tmp;
4666
4667                 } else {
4668                         mqd->cp_hqd_pq_doorbell_control = 0;
4669                 }
4670                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4671                        mqd->cp_hqd_pq_doorbell_control);
4672
4673                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4674                 ring->wptr = 0;
4675                 mqd->cp_hqd_pq_wptr = ring->wptr;
4676                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4677                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4678
4679                 /* set the vmid for the queue */
4680                 mqd->cp_hqd_vmid = 0;
4681                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4682
4683                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4684                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4685                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4686                 mqd->cp_hqd_persistent_state = tmp;
4687                 if (adev->asic_type == CHIP_STONEY ||
4688                         adev->asic_type == CHIP_POLARIS11 ||
4689                         adev->asic_type == CHIP_POLARIS10) {
4690                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4691                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4692                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4693                 }
4694
4695                 /* activate the queue */
4696                 mqd->cp_hqd_active = 1;
4697                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4698
4699                 vi_srbm_select(adev, 0, 0, 0, 0);
4700                 mutex_unlock(&adev->srbm_mutex);
4701
4702                 amdgpu_bo_kunmap(ring->mqd_obj);
4703                 amdgpu_bo_unreserve(ring->mqd_obj);
4704         }
4705
4706         if (use_doorbell) {
4707                 tmp = RREG32(mmCP_PQ_STATUS);
4708                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4709                 WREG32(mmCP_PQ_STATUS, tmp);
4710         }
4711
4712         gfx_v8_0_cp_compute_enable(adev, true);
4713
4714         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4715                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4716
4717                 ring->ready = true;
4718                 r = amdgpu_ring_test_ring(ring);
4719                 if (r)
4720                         ring->ready = false;
4721         }
4722
4723         return 0;
4724 }
4725
4726 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4727 {
4728         int r;
4729
4730         if (!(adev->flags & AMD_IS_APU))
4731                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4732
4733         if (!adev->pp_enabled) {
4734                 if (!adev->firmware.smu_load) {
4735                         /* legacy firmware loading */
4736                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
4737                         if (r)
4738                                 return r;
4739
4740                         r = gfx_v8_0_cp_compute_load_microcode(adev);
4741                         if (r)
4742                                 return r;
4743                 } else {
4744                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4745                                                         AMDGPU_UCODE_ID_CP_CE);
4746                         if (r)
4747                                 return -EINVAL;
4748
4749                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4750                                                         AMDGPU_UCODE_ID_CP_PFP);
4751                         if (r)
4752                                 return -EINVAL;
4753
4754                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4755                                                         AMDGPU_UCODE_ID_CP_ME);
4756                         if (r)
4757                                 return -EINVAL;
4758
4759                         if (adev->asic_type == CHIP_TOPAZ) {
4760                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4761                                 if (r)
4762                                         return r;
4763                         } else {
4764                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4765                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
4766                                 if (r)
4767                                         return -EINVAL;
4768                         }
4769                 }
4770         }
4771
4772         r = gfx_v8_0_cp_gfx_resume(adev);
4773         if (r)
4774                 return r;
4775
4776         r = gfx_v8_0_cp_compute_resume(adev);
4777         if (r)
4778                 return r;
4779
4780         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4781
4782         return 0;
4783 }
4784
4785 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4786 {
4787         gfx_v8_0_cp_gfx_enable(adev, enable);
4788         gfx_v8_0_cp_compute_enable(adev, enable);
4789 }
4790
4791 static int gfx_v8_0_hw_init(void *handle)
4792 {
4793         int r;
4794         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4795
4796         gfx_v8_0_init_golden_registers(adev);
4797
4798         gfx_v8_0_gpu_init(adev);
4799
4800         r = gfx_v8_0_rlc_resume(adev);
4801         if (r)
4802                 return r;
4803
4804         r = gfx_v8_0_cp_resume(adev);
4805         if (r)
4806                 return r;
4807
4808         return r;
4809 }
4810
4811 static int gfx_v8_0_hw_fini(void *handle)
4812 {
4813         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4814
4815         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4816         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4817         gfx_v8_0_cp_enable(adev, false);
4818         gfx_v8_0_rlc_stop(adev);
4819         gfx_v8_0_cp_compute_fini(adev);
4820
4821         amdgpu_set_powergating_state(adev,
4822                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
4823
4824         return 0;
4825 }
4826
4827 static int gfx_v8_0_suspend(void *handle)
4828 {
4829         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4830
4831         return gfx_v8_0_hw_fini(adev);
4832 }
4833
4834 static int gfx_v8_0_resume(void *handle)
4835 {
4836         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4837
4838         return gfx_v8_0_hw_init(adev);
4839 }
4840
4841 static bool gfx_v8_0_is_idle(void *handle)
4842 {
4843         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4844
4845         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
4846                 return false;
4847         else
4848                 return true;
4849 }
4850
4851 static int gfx_v8_0_wait_for_idle(void *handle)
4852 {
4853         unsigned i;
4854         u32 tmp;
4855         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4856
4857         for (i = 0; i < adev->usec_timeout; i++) {
4858                 /* read MC_STATUS */
4859                 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
4860
4861                 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
4862                         return 0;
4863                 udelay(1);
4864         }
4865         return -ETIMEDOUT;
4866 }
4867
4868 static int gfx_v8_0_soft_reset(void *handle)
4869 {
4870         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4871         u32 tmp;
4872         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4873
4874         /* GRBM_STATUS */
4875         tmp = RREG32(mmGRBM_STATUS);
4876         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4877                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4878                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4879                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4880                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4881                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4882                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4883                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4884                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4885                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4886         }
4887
4888         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4889                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4890                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4891                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4892                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4893         }
4894
4895         /* GRBM_STATUS2 */
4896         tmp = RREG32(mmGRBM_STATUS2);
4897         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4898                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4899                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4900
4901         /* SRBM_STATUS */
4902         tmp = RREG32(mmSRBM_STATUS);
4903         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4904                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4905                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4906
4907         if (grbm_soft_reset || srbm_soft_reset) {
4908                 /* stop the rlc */
4909                 gfx_v8_0_rlc_stop(adev);
4910
4911                 /* Disable GFX parsing/prefetching */
4912                 gfx_v8_0_cp_gfx_enable(adev, false);
4913
4914                 /* Disable MEC parsing/prefetching */
4915                 gfx_v8_0_cp_compute_enable(adev, false);
4916
4917                 if (grbm_soft_reset || srbm_soft_reset) {
4918                         tmp = RREG32(mmGMCON_DEBUG);
4919                         tmp = REG_SET_FIELD(tmp,
4920                                             GMCON_DEBUG, GFX_STALL, 1);
4921                         tmp = REG_SET_FIELD(tmp,
4922                                             GMCON_DEBUG, GFX_CLEAR, 1);
4923                         WREG32(mmGMCON_DEBUG, tmp);
4924
4925                         udelay(50);
4926                 }
4927
4928                 if (grbm_soft_reset) {
4929                         tmp = RREG32(mmGRBM_SOFT_RESET);
4930                         tmp |= grbm_soft_reset;
4931                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4932                         WREG32(mmGRBM_SOFT_RESET, tmp);
4933                         tmp = RREG32(mmGRBM_SOFT_RESET);
4934
4935                         udelay(50);
4936
4937                         tmp &= ~grbm_soft_reset;
4938                         WREG32(mmGRBM_SOFT_RESET, tmp);
4939                         tmp = RREG32(mmGRBM_SOFT_RESET);
4940                 }
4941
4942                 if (srbm_soft_reset) {
4943                         tmp = RREG32(mmSRBM_SOFT_RESET);
4944                         tmp |= srbm_soft_reset;
4945                         dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4946                         WREG32(mmSRBM_SOFT_RESET, tmp);
4947                         tmp = RREG32(mmSRBM_SOFT_RESET);
4948
4949                         udelay(50);
4950
4951                         tmp &= ~srbm_soft_reset;
4952                         WREG32(mmSRBM_SOFT_RESET, tmp);
4953                         tmp = RREG32(mmSRBM_SOFT_RESET);
4954                 }
4955
4956                 if (grbm_soft_reset || srbm_soft_reset) {
4957                         tmp = RREG32(mmGMCON_DEBUG);
4958                         tmp = REG_SET_FIELD(tmp,
4959                                             GMCON_DEBUG, GFX_STALL, 0);
4960                         tmp = REG_SET_FIELD(tmp,
4961                                             GMCON_DEBUG, GFX_CLEAR, 0);
4962                         WREG32(mmGMCON_DEBUG, tmp);
4963                 }
4964
4965                 /* Wait a little for things to settle down */
4966                 udelay(50);
4967         }
4968         return 0;
4969 }
4970
4971 /**
4972  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4973  *
4974  * @adev: amdgpu_device pointer
4975  *
4976  * Fetches a GPU clock counter snapshot.
4977  * Returns the 64 bit clock counter snapshot.
4978  */
4979 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4980 {
4981         uint64_t clock;
4982
4983         mutex_lock(&adev->gfx.gpu_clock_mutex);
4984         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4985         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4986                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4987         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4988         return clock;
4989 }
4990
4991 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4992                                           uint32_t vmid,
4993                                           uint32_t gds_base, uint32_t gds_size,
4994                                           uint32_t gws_base, uint32_t gws_size,
4995                                           uint32_t oa_base, uint32_t oa_size)
4996 {
4997         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4998         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4999
5000         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5001         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5002
5003         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5004         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5005
5006         /* GDS Base */
5007         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5008         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5009                                 WRITE_DATA_DST_SEL(0)));
5010         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5011         amdgpu_ring_write(ring, 0);
5012         amdgpu_ring_write(ring, gds_base);
5013
5014         /* GDS Size */
5015         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5016         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5017                                 WRITE_DATA_DST_SEL(0)));
5018         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5019         amdgpu_ring_write(ring, 0);
5020         amdgpu_ring_write(ring, gds_size);
5021
5022         /* GWS */
5023         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5024         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5025                                 WRITE_DATA_DST_SEL(0)));
5026         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5027         amdgpu_ring_write(ring, 0);
5028         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5029
5030         /* OA */
5031         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5032         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5033                                 WRITE_DATA_DST_SEL(0)));
5034         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5035         amdgpu_ring_write(ring, 0);
5036         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5037 }
5038
5039 static int gfx_v8_0_early_init(void *handle)
5040 {
5041         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5042
5043         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5044         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5045         gfx_v8_0_set_ring_funcs(adev);
5046         gfx_v8_0_set_irq_funcs(adev);
5047         gfx_v8_0_set_gds_init(adev);
5048         gfx_v8_0_set_rlc_funcs(adev);
5049
5050         return 0;
5051 }
5052
5053 static int gfx_v8_0_late_init(void *handle)
5054 {
5055         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5056         int r;
5057
5058         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5059         if (r)
5060                 return r;
5061
5062         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5063         if (r)
5064                 return r;
5065
5066         /* requires IBs so do in late init after IB pool is initialized */
5067         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5068         if (r)
5069                 return r;
5070
5071         amdgpu_set_powergating_state(adev,
5072                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5073
5074         return 0;
5075 }
5076
5077 static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5078                 bool enable)
5079 {
5080         uint32_t data, temp;
5081
5082         /* Send msg to SMU via Powerplay */
5083         amdgpu_set_powergating_state(adev,
5084                         AMD_IP_BLOCK_TYPE_SMC,
5085                         enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5086
5087         if (enable) {
5088                 /* Enable static MGPG */
5089                 temp = data = RREG32(mmRLC_PG_CNTL);
5090                 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5091
5092                 if (temp != data)
5093                         WREG32(mmRLC_PG_CNTL, data);
5094         } else {
5095                 temp = data = RREG32(mmRLC_PG_CNTL);
5096                 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5097
5098                 if (temp != data)
5099                         WREG32(mmRLC_PG_CNTL, data);
5100         }
5101 }
5102
5103 static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5104                 bool enable)
5105 {
5106         uint32_t data, temp;
5107
5108         if (enable) {
5109                 /* Enable dynamic MGPG */
5110                 temp = data = RREG32(mmRLC_PG_CNTL);
5111                 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5112
5113                 if (temp != data)
5114                         WREG32(mmRLC_PG_CNTL, data);
5115         } else {
5116                 temp = data = RREG32(mmRLC_PG_CNTL);
5117                 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5118
5119                 if (temp != data)
5120                         WREG32(mmRLC_PG_CNTL, data);
5121         }
5122 }
5123
5124 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5125                 bool enable)
5126 {
5127         uint32_t data, temp;
5128
5129         if (enable) {
5130                 /* Enable quick PG */
5131                 temp = data = RREG32(mmRLC_PG_CNTL);
5132                 data |= 0x100000;
5133
5134                 if (temp != data)
5135                         WREG32(mmRLC_PG_CNTL, data);
5136         } else {
5137                 temp = data = RREG32(mmRLC_PG_CNTL);
5138                 data &= ~0x100000;
5139
5140                 if (temp != data)
5141                         WREG32(mmRLC_PG_CNTL, data);
5142         }
5143 }
5144
5145 static int gfx_v8_0_set_powergating_state(void *handle,
5146                                           enum amd_powergating_state state)
5147 {
5148         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5149
5150         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5151                 return 0;
5152
5153         switch (adev->asic_type) {
5154         case CHIP_POLARIS11:
5155                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
5156                         polaris11_enable_gfx_static_mg_power_gating(adev,
5157                                         state == AMD_PG_STATE_GATE ? true : false);
5158                 else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
5159                         polaris11_enable_gfx_dynamic_mg_power_gating(adev,
5160                                         state == AMD_PG_STATE_GATE ? true : false);
5161                 else
5162                         polaris11_enable_gfx_quick_mg_power_gating(adev,
5163                                         state == AMD_PG_STATE_GATE ? true : false);
5164                 break;
5165         default:
5166                 break;
5167         }
5168
5169         return 0;
5170 }
5171
5172 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5173                                      uint32_t reg_addr, uint32_t cmd)
5174 {
5175         uint32_t data;
5176
5177         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5178
5179         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5180         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5181
5182         data = RREG32(mmRLC_SERDES_WR_CTRL);
5183         if (adev->asic_type == CHIP_STONEY)
5184                         data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5185                         RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5186                         RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5187                         RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5188                         RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5189                         RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5190                         RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5191                         RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5192                         RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5193         else
5194                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5195                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5196                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5197                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5198                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5199                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5200                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5201                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5202                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5203                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5204                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5205         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5206                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5207                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5208                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5209
5210         WREG32(mmRLC_SERDES_WR_CTRL, data);
5211 }
5212
5213 #define MSG_ENTER_RLC_SAFE_MODE     1
5214 #define MSG_EXIT_RLC_SAFE_MODE      0
5215
5216 #define RLC_GPR_REG2__REQ_MASK           0x00000001
5217 #define RLC_GPR_REG2__MESSAGE__SHIFT     0x00000001
5218 #define RLC_GPR_REG2__MESSAGE_MASK       0x0000001e
5219
5220 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5221 {
5222         u32 data = 0;
5223         unsigned i;
5224
5225         data = RREG32(mmRLC_CNTL);
5226         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5227                 return;
5228
5229         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5230             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5231                                AMD_PG_SUPPORT_GFX_DMG))) {
5232                 data |= RLC_GPR_REG2__REQ_MASK;
5233                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5234                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5235                 WREG32(mmRLC_GPR_REG2, data);
5236
5237                 for (i = 0; i < adev->usec_timeout; i++) {
5238                         if ((RREG32(mmRLC_GPM_STAT) &
5239                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5240                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5241                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5242                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5243                                 break;
5244                         udelay(1);
5245                 }
5246
5247                 for (i = 0; i < adev->usec_timeout; i++) {
5248                         if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5249                                 break;
5250                         udelay(1);
5251                 }
5252                 adev->gfx.rlc.in_safe_mode = true;
5253         }
5254 }
5255
5256 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5257 {
5258         u32 data;
5259         unsigned i;
5260
5261         data = RREG32(mmRLC_CNTL);
5262         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5263                 return;
5264
5265         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5266             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5267                                AMD_PG_SUPPORT_GFX_DMG))) {
5268                 data |= RLC_GPR_REG2__REQ_MASK;
5269                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5270                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5271                 WREG32(mmRLC_GPR_REG2, data);
5272                 adev->gfx.rlc.in_safe_mode = false;
5273         }
5274
5275         for (i = 0; i < adev->usec_timeout; i++) {
5276                 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5277                         break;
5278                 udelay(1);
5279         }
5280 }
5281
5282 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5283 {
5284         u32 data;
5285         unsigned i;
5286
5287         data = RREG32(mmRLC_CNTL);
5288         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5289                 return;
5290
5291         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5292                 data |= RLC_SAFE_MODE__CMD_MASK;
5293                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5294                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5295                 WREG32(mmRLC_SAFE_MODE, data);
5296
5297                 for (i = 0; i < adev->usec_timeout; i++) {
5298                         if ((RREG32(mmRLC_GPM_STAT) &
5299                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5300                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5301                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5302                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5303                                 break;
5304                         udelay(1);
5305                 }
5306
5307                 for (i = 0; i < adev->usec_timeout; i++) {
5308                         if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5309                                 break;
5310                         udelay(1);
5311                 }
5312                 adev->gfx.rlc.in_safe_mode = true;
5313         }
5314 }
5315
5316 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5317 {
5318         u32 data = 0;
5319         unsigned i;
5320
5321         data = RREG32(mmRLC_CNTL);
5322         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5323                 return;
5324
5325         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5326                 if (adev->gfx.rlc.in_safe_mode) {
5327                         data |= RLC_SAFE_MODE__CMD_MASK;
5328                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5329                         WREG32(mmRLC_SAFE_MODE, data);
5330                         adev->gfx.rlc.in_safe_mode = false;
5331                 }
5332         }
5333
5334         for (i = 0; i < adev->usec_timeout; i++) {
5335                 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5336                         break;
5337                 udelay(1);
5338         }
5339 }
5340
5341 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5342 {
5343         adev->gfx.rlc.in_safe_mode = true;
5344 }
5345
5346 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5347 {
5348         adev->gfx.rlc.in_safe_mode = false;
5349 }
5350
5351 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5352         .enter_safe_mode = cz_enter_rlc_safe_mode,
5353         .exit_safe_mode = cz_exit_rlc_safe_mode
5354 };
5355
5356 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5357         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5358         .exit_safe_mode = iceland_exit_rlc_safe_mode
5359 };
5360
5361 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5362         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5363         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5364 };
5365
5366 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5367                                                       bool enable)
5368 {
5369         uint32_t temp, data;
5370
5371         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5372
5373         /* It is disabled by HW by default */
5374         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5375                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5376                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5377                                 /* 1 - RLC memory Light sleep */
5378                                 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5379                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5380                                 if (temp != data)
5381                                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5382                         }
5383
5384                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5385                                 /* 2 - CP memory Light sleep */
5386                                 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5387                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5388                                 if (temp != data)
5389                                         WREG32(mmCP_MEM_SLP_CNTL, data);
5390                         }
5391                 }
5392
5393                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5394                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5395                 if (adev->flags & AMD_IS_APU)
5396                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5397                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5398                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5399                 else
5400                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5401                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5402                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5403                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5404
5405                 if (temp != data)
5406                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5407
5408                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5409                 gfx_v8_0_wait_for_rlc_serdes(adev);
5410
5411                 /* 5 - clear mgcg override */
5412                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5413
5414                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5415                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5416                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5417                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5418                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5419                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5420                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5421                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5422                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5423                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5424                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5425                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5426                         if (temp != data)
5427                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5428                 }
5429                 udelay(50);
5430
5431                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5432                 gfx_v8_0_wait_for_rlc_serdes(adev);
5433         } else {
5434                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5435                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5436                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5437                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5438                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5439                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5440                 if (temp != data)
5441                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5442
5443                 /* 2 - disable MGLS in RLC */
5444                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5445                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5446                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5447                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5448                 }
5449
5450                 /* 3 - disable MGLS in CP */
5451                 data = RREG32(mmCP_MEM_SLP_CNTL);
5452                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5453                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5454                         WREG32(mmCP_MEM_SLP_CNTL, data);
5455                 }
5456
5457                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5458                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5459                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5460                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5461                 if (temp != data)
5462                         WREG32(mmCGTS_SM_CTRL_REG, data);
5463
5464                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5465                 gfx_v8_0_wait_for_rlc_serdes(adev);
5466
5467                 /* 6 - set mgcg override */
5468                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5469
5470                 udelay(50);
5471
5472                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5473                 gfx_v8_0_wait_for_rlc_serdes(adev);
5474         }
5475
5476         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5477 }
5478
5479 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5480                                                       bool enable)
5481 {
5482         uint32_t temp, temp1, data, data1;
5483
5484         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5485
5486         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5487
5488         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5489                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5490                  * Cmp_busy/GFX_Idle interrupts
5491                  */
5492                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5493
5494                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5495                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5496                 if (temp1 != data1)
5497                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5498
5499                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5500                 gfx_v8_0_wait_for_rlc_serdes(adev);
5501
5502                 /* 3 - clear cgcg override */
5503                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5504
5505                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5506                 gfx_v8_0_wait_for_rlc_serdes(adev);
5507
5508                 /* 4 - write cmd to set CGLS */
5509                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5510
5511                 /* 5 - enable cgcg */
5512                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5513
5514                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5515                         /* enable cgls*/
5516                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5517
5518                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5519                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5520
5521                         if (temp1 != data1)
5522                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5523                 } else {
5524                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5525                 }
5526
5527                 if (temp != data)
5528                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5529         } else {
5530                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5531                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5532
5533                 /* TEST CGCG */
5534                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5535                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5536                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5537                 if (temp1 != data1)
5538                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5539
5540                 /* read gfx register to wake up cgcg */
5541                 RREG32(mmCB_CGTT_SCLK_CTRL);
5542                 RREG32(mmCB_CGTT_SCLK_CTRL);
5543                 RREG32(mmCB_CGTT_SCLK_CTRL);
5544                 RREG32(mmCB_CGTT_SCLK_CTRL);
5545
5546                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5547                 gfx_v8_0_wait_for_rlc_serdes(adev);
5548
5549                 /* write cmd to Set CGCG Overrride */
5550                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5551
5552                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5553                 gfx_v8_0_wait_for_rlc_serdes(adev);
5554
5555                 /* write cmd to Clear CGLS */
5556                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5557
5558                 /* disable cgcg, cgls should be disabled too. */
5559                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5560                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5561                 if (temp != data)
5562                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5563         }
5564
5565         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5566 }
5567 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5568                                             bool enable)
5569 {
5570         if (enable) {
5571                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5572                  * ===  MGCG + MGLS + TS(CG/LS) ===
5573                  */
5574                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5575                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5576         } else {
5577                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5578                  * ===  CGCG + CGLS ===
5579                  */
5580                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5581                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5582         }
5583         return 0;
5584 }
5585
5586 static int gfx_v8_0_set_clockgating_state(void *handle,
5587                                           enum amd_clockgating_state state)
5588 {
5589         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5590
5591         switch (adev->asic_type) {
5592         case CHIP_FIJI:
5593         case CHIP_CARRIZO:
5594         case CHIP_STONEY:
5595                 gfx_v8_0_update_gfx_clock_gating(adev,
5596                                                  state == AMD_CG_STATE_GATE ? true : false);
5597                 break;
5598         default:
5599                 break;
5600         }
5601         return 0;
5602 }
5603
5604 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5605 {
5606         u32 rptr;
5607
5608         rptr = ring->adev->wb.wb[ring->rptr_offs];
5609
5610         return rptr;
5611 }
5612
5613 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5614 {
5615         struct amdgpu_device *adev = ring->adev;
5616         u32 wptr;
5617
5618         if (ring->use_doorbell)
5619                 /* XXX check if swapping is necessary on BE */
5620                 wptr = ring->adev->wb.wb[ring->wptr_offs];
5621         else
5622                 wptr = RREG32(mmCP_RB0_WPTR);
5623
5624         return wptr;
5625 }
5626
5627 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5628 {
5629         struct amdgpu_device *adev = ring->adev;
5630
5631         if (ring->use_doorbell) {
5632                 /* XXX check if swapping is necessary on BE */
5633                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5634                 WDOORBELL32(ring->doorbell_index, ring->wptr);
5635         } else {
5636                 WREG32(mmCP_RB0_WPTR, ring->wptr);
5637                 (void)RREG32(mmCP_RB0_WPTR);
5638         }
5639 }
5640
5641 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5642 {
5643         u32 ref_and_mask, reg_mem_engine;
5644
5645         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5646                 switch (ring->me) {
5647                 case 1:
5648                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5649                         break;
5650                 case 2:
5651                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5652                         break;
5653                 default:
5654                         return;
5655                 }
5656                 reg_mem_engine = 0;
5657         } else {
5658                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5659                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5660         }
5661
5662         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5663         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5664                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
5665                                  reg_mem_engine));
5666         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5667         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5668         amdgpu_ring_write(ring, ref_and_mask);
5669         amdgpu_ring_write(ring, ref_and_mask);
5670         amdgpu_ring_write(ring, 0x20); /* poll interval */
5671 }
5672
5673 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5674 {
5675         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5676         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5677                                  WRITE_DATA_DST_SEL(0) |
5678                                  WR_CONFIRM));
5679         amdgpu_ring_write(ring, mmHDP_DEBUG0);
5680         amdgpu_ring_write(ring, 0);
5681         amdgpu_ring_write(ring, 1);
5682
5683 }
5684
5685 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5686                                       struct amdgpu_ib *ib,
5687                                       unsigned vm_id, bool ctx_switch)
5688 {
5689         u32 header, control = 0;
5690         u32 next_rptr = ring->wptr + 5;
5691
5692         if (ctx_switch)
5693                 next_rptr += 2;
5694
5695         next_rptr += 4;
5696         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5698         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5699         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5700         amdgpu_ring_write(ring, next_rptr);
5701
5702         /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5703         if (ctx_switch) {
5704                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5705                 amdgpu_ring_write(ring, 0);
5706         }
5707
5708         if (ib->flags & AMDGPU_IB_FLAG_CE)
5709                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5710         else
5711                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5712
5713         control |= ib->length_dw | (vm_id << 24);
5714
5715         amdgpu_ring_write(ring, header);
5716         amdgpu_ring_write(ring,
5717 #ifdef __BIG_ENDIAN
5718                           (2 << 0) |
5719 #endif
5720                           (ib->gpu_addr & 0xFFFFFFFC));
5721         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5722         amdgpu_ring_write(ring, control);
5723 }
5724
5725 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5726                                           struct amdgpu_ib *ib,
5727                                           unsigned vm_id, bool ctx_switch)
5728 {
5729         u32 header, control = 0;
5730         u32 next_rptr = ring->wptr + 5;
5731
5732         control |= INDIRECT_BUFFER_VALID;
5733
5734         next_rptr += 4;
5735         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5736         amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
5737         amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
5738         amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
5739         amdgpu_ring_write(ring, next_rptr);
5740
5741         header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5742
5743         control |= ib->length_dw | (vm_id << 24);
5744
5745         amdgpu_ring_write(ring, header);
5746         amdgpu_ring_write(ring,
5747 #ifdef __BIG_ENDIAN
5748                                           (2 << 0) |
5749 #endif
5750                                           (ib->gpu_addr & 0xFFFFFFFC));
5751         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5752         amdgpu_ring_write(ring, control);
5753 }
5754
5755 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5756                                          u64 seq, unsigned flags)
5757 {
5758         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5759         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5760
5761         /* EVENT_WRITE_EOP - flush caches, send int */
5762         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5763         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5764                                  EOP_TC_ACTION_EN |
5765                                  EOP_TC_WB_ACTION_EN |
5766                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5767                                  EVENT_INDEX(5)));
5768         amdgpu_ring_write(ring, addr & 0xfffffffc);
5769         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5770                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5771         amdgpu_ring_write(ring, lower_32_bits(seq));
5772         amdgpu_ring_write(ring, upper_32_bits(seq));
5773
5774 }
5775
5776 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5777 {
5778         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5779         uint32_t seq = ring->fence_drv.sync_seq;
5780         uint64_t addr = ring->fence_drv.gpu_addr;
5781
5782         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5783         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
5784                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
5785                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
5786         amdgpu_ring_write(ring, addr & 0xfffffffc);
5787         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
5788         amdgpu_ring_write(ring, seq);
5789         amdgpu_ring_write(ring, 0xffffffff);
5790         amdgpu_ring_write(ring, 4); /* poll interval */
5791
5792         if (usepfp) {
5793                 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
5794                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5795                 amdgpu_ring_write(ring, 0);
5796                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5797                 amdgpu_ring_write(ring, 0);
5798         }
5799 }
5800
5801 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5802                                         unsigned vm_id, uint64_t pd_addr)
5803 {
5804         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5805
5806         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5807         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5808                                  WRITE_DATA_DST_SEL(0)) |
5809                                  WR_CONFIRM);
5810         if (vm_id < 8) {
5811                 amdgpu_ring_write(ring,
5812                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
5813         } else {
5814                 amdgpu_ring_write(ring,
5815                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
5816         }
5817         amdgpu_ring_write(ring, 0);
5818         amdgpu_ring_write(ring, pd_addr >> 12);
5819
5820         /* bits 0-15 are the VM contexts0-15 */
5821         /* invalidate the cache */
5822         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5823         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5824                                  WRITE_DATA_DST_SEL(0)));
5825         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5826         amdgpu_ring_write(ring, 0);
5827         amdgpu_ring_write(ring, 1 << vm_id);
5828
5829         /* wait for the invalidate to complete */
5830         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5831         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5832                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
5833                                  WAIT_REG_MEM_ENGINE(0))); /* me */
5834         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
5835         amdgpu_ring_write(ring, 0);
5836         amdgpu_ring_write(ring, 0); /* ref */
5837         amdgpu_ring_write(ring, 0); /* mask */
5838         amdgpu_ring_write(ring, 0x20); /* poll interval */
5839
5840         /* compute doesn't have PFP */
5841         if (usepfp) {
5842                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5843                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5844                 amdgpu_ring_write(ring, 0x0);
5845                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5846                 amdgpu_ring_write(ring, 0);
5847                 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5848                 amdgpu_ring_write(ring, 0);
5849         }
5850 }
5851
5852 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5853 {
5854         return ring->adev->wb.wb[ring->rptr_offs];
5855 }
5856
5857 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5858 {
5859         return ring->adev->wb.wb[ring->wptr_offs];
5860 }
5861
5862 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5863 {
5864         struct amdgpu_device *adev = ring->adev;
5865
5866         /* XXX check if swapping is necessary on BE */
5867         adev->wb.wb[ring->wptr_offs] = ring->wptr;
5868         WDOORBELL32(ring->doorbell_index, ring->wptr);
5869 }
5870
5871 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
5872                                              u64 addr, u64 seq,
5873                                              unsigned flags)
5874 {
5875         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5876         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5877
5878         /* RELEASE_MEM - flush caches, send int */
5879         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
5880         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5881                                  EOP_TC_ACTION_EN |
5882                                  EOP_TC_WB_ACTION_EN |
5883                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5884                                  EVENT_INDEX(5)));
5885         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5886         amdgpu_ring_write(ring, addr & 0xfffffffc);
5887         amdgpu_ring_write(ring, upper_32_bits(addr));
5888         amdgpu_ring_write(ring, lower_32_bits(seq));
5889         amdgpu_ring_write(ring, upper_32_bits(seq));
5890 }
5891
5892 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5893                                                  enum amdgpu_interrupt_state state)
5894 {
5895         u32 cp_int_cntl;
5896
5897         switch (state) {
5898         case AMDGPU_IRQ_STATE_DISABLE:
5899                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5900                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5901                                             TIME_STAMP_INT_ENABLE, 0);
5902                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5903                 break;
5904         case AMDGPU_IRQ_STATE_ENABLE:
5905                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5906                 cp_int_cntl =
5907                         REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5908                                       TIME_STAMP_INT_ENABLE, 1);
5909                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5910                 break;
5911         default:
5912                 break;
5913         }
5914 }
5915
5916 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5917                                                      int me, int pipe,
5918                                                      enum amdgpu_interrupt_state state)
5919 {
5920         u32 mec_int_cntl, mec_int_cntl_reg;
5921
5922         /*
5923          * amdgpu controls only pipe 0 of MEC1. That's why this function only
5924          * handles the setting of interrupts for this specific pipe. All other
5925          * pipes' interrupts are set by amdkfd.
5926          */
5927
5928         if (me == 1) {
5929                 switch (pipe) {
5930                 case 0:
5931                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5932                         break;
5933                 default:
5934                         DRM_DEBUG("invalid pipe %d\n", pipe);
5935                         return;
5936                 }
5937         } else {
5938                 DRM_DEBUG("invalid me %d\n", me);
5939                 return;
5940         }
5941
5942         switch (state) {
5943         case AMDGPU_IRQ_STATE_DISABLE:
5944                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5945                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5946                                              TIME_STAMP_INT_ENABLE, 0);
5947                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5948                 break;
5949         case AMDGPU_IRQ_STATE_ENABLE:
5950                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5951                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5952                                              TIME_STAMP_INT_ENABLE, 1);
5953                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5954                 break;
5955         default:
5956                 break;
5957         }
5958 }
5959
5960 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5961                                              struct amdgpu_irq_src *source,
5962                                              unsigned type,
5963                                              enum amdgpu_interrupt_state state)
5964 {
5965         u32 cp_int_cntl;
5966
5967         switch (state) {
5968         case AMDGPU_IRQ_STATE_DISABLE:
5969                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5970                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5971                                             PRIV_REG_INT_ENABLE, 0);
5972                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5973                 break;
5974         case AMDGPU_IRQ_STATE_ENABLE:
5975                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5976                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5977                                             PRIV_REG_INT_ENABLE, 1);
5978                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5979                 break;
5980         default:
5981                 break;
5982         }
5983
5984         return 0;
5985 }
5986
5987 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5988                                               struct amdgpu_irq_src *source,
5989                                               unsigned type,
5990                                               enum amdgpu_interrupt_state state)
5991 {
5992         u32 cp_int_cntl;
5993
5994         switch (state) {
5995         case AMDGPU_IRQ_STATE_DISABLE:
5996                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5997                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5998                                             PRIV_INSTR_INT_ENABLE, 0);
5999                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6000                 break;
6001         case AMDGPU_IRQ_STATE_ENABLE:
6002                 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6003                 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6004                                             PRIV_INSTR_INT_ENABLE, 1);
6005                 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6006                 break;
6007         default:
6008                 break;
6009         }
6010
6011         return 0;
6012 }
6013
6014 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6015                                             struct amdgpu_irq_src *src,
6016                                             unsigned type,
6017                                             enum amdgpu_interrupt_state state)
6018 {
6019         switch (type) {
6020         case AMDGPU_CP_IRQ_GFX_EOP:
6021                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6022                 break;
6023         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6024                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6025                 break;
6026         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6027                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6028                 break;
6029         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6030                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6031                 break;
6032         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6033                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6034                 break;
6035         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6036                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6037                 break;
6038         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6039                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6040                 break;
6041         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6042                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6043                 break;
6044         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6045                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6046                 break;
6047         default:
6048                 break;
6049         }
6050         return 0;
6051 }
6052
6053 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6054                             struct amdgpu_irq_src *source,
6055                             struct amdgpu_iv_entry *entry)
6056 {
6057         int i;
6058         u8 me_id, pipe_id, queue_id;
6059         struct amdgpu_ring *ring;
6060
6061         DRM_DEBUG("IH: CP EOP\n");
6062         me_id = (entry->ring_id & 0x0c) >> 2;
6063         pipe_id = (entry->ring_id & 0x03) >> 0;
6064         queue_id = (entry->ring_id & 0x70) >> 4;
6065
6066         switch (me_id) {
6067         case 0:
6068                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6069                 break;
6070         case 1:
6071         case 2:
6072                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6073                         ring = &adev->gfx.compute_ring[i];
6074                         /* Per-queue interrupt is supported for MEC starting from VI.
6075                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6076                           */
6077                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6078                                 amdgpu_fence_process(ring);
6079                 }
6080                 break;
6081         }
6082         return 0;
6083 }
6084
6085 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6086                                  struct amdgpu_irq_src *source,
6087                                  struct amdgpu_iv_entry *entry)
6088 {
6089         DRM_ERROR("Illegal register access in command stream\n");
6090         schedule_work(&adev->reset_work);
6091         return 0;
6092 }
6093
6094 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6095                                   struct amdgpu_irq_src *source,
6096                                   struct amdgpu_iv_entry *entry)
6097 {
6098         DRM_ERROR("Illegal instruction in command stream\n");
6099         schedule_work(&adev->reset_work);
6100         return 0;
6101 }
6102
6103 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6104         .name = "gfx_v8_0",
6105         .early_init = gfx_v8_0_early_init,
6106         .late_init = gfx_v8_0_late_init,
6107         .sw_init = gfx_v8_0_sw_init,
6108         .sw_fini = gfx_v8_0_sw_fini,
6109         .hw_init = gfx_v8_0_hw_init,
6110         .hw_fini = gfx_v8_0_hw_fini,
6111         .suspend = gfx_v8_0_suspend,
6112         .resume = gfx_v8_0_resume,
6113         .is_idle = gfx_v8_0_is_idle,
6114         .wait_for_idle = gfx_v8_0_wait_for_idle,
6115         .soft_reset = gfx_v8_0_soft_reset,
6116         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6117         .set_powergating_state = gfx_v8_0_set_powergating_state,
6118 };
6119
6120 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6121         .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6122         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6123         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6124         .parse_cs = NULL,
6125         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6126         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6127         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6128         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6129         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6130         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6131         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6132         .test_ring = gfx_v8_0_ring_test_ring,
6133         .test_ib = gfx_v8_0_ring_test_ib,
6134         .insert_nop = amdgpu_ring_insert_nop,
6135         .pad_ib = amdgpu_ring_generic_pad_ib,
6136 };
6137
6138 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6139         .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6140         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6141         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6142         .parse_cs = NULL,
6143         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6144         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6145         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6146         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6147         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6148         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6149         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6150         .test_ring = gfx_v8_0_ring_test_ring,
6151         .test_ib = gfx_v8_0_ring_test_ib,
6152         .insert_nop = amdgpu_ring_insert_nop,
6153         .pad_ib = amdgpu_ring_generic_pad_ib,
6154 };
6155
6156 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6157 {
6158         int i;
6159
6160         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6161                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6162
6163         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6164                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6165 }
6166
6167 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6168         .set = gfx_v8_0_set_eop_interrupt_state,
6169         .process = gfx_v8_0_eop_irq,
6170 };
6171
6172 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6173         .set = gfx_v8_0_set_priv_reg_fault_state,
6174         .process = gfx_v8_0_priv_reg_irq,
6175 };
6176
6177 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6178         .set = gfx_v8_0_set_priv_inst_fault_state,
6179         .process = gfx_v8_0_priv_inst_irq,
6180 };
6181
6182 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6183 {
6184         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6185         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6186
6187         adev->gfx.priv_reg_irq.num_types = 1;
6188         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6189
6190         adev->gfx.priv_inst_irq.num_types = 1;
6191         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6192 }
6193
6194 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6195 {
6196         switch (adev->asic_type) {
6197         case CHIP_TOPAZ:
6198         case CHIP_STONEY:
6199                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6200                 break;
6201         case CHIP_CARRIZO:
6202                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6203                 break;
6204         default:
6205                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6206                 break;
6207         }
6208 }
6209
6210 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6211 {
6212         /* init asci gds info */
6213         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6214         adev->gds.gws.total_size = 64;
6215         adev->gds.oa.total_size = 16;
6216
6217         if (adev->gds.mem.total_size == 64 * 1024) {
6218                 adev->gds.mem.gfx_partition_size = 4096;
6219                 adev->gds.mem.cs_partition_size = 4096;
6220
6221                 adev->gds.gws.gfx_partition_size = 4;
6222                 adev->gds.gws.cs_partition_size = 4;
6223
6224                 adev->gds.oa.gfx_partition_size = 4;
6225                 adev->gds.oa.cs_partition_size = 1;
6226         } else {
6227                 adev->gds.mem.gfx_partition_size = 1024;
6228                 adev->gds.mem.cs_partition_size = 1024;
6229
6230                 adev->gds.gws.gfx_partition_size = 16;
6231                 adev->gds.gws.cs_partition_size = 16;
6232
6233                 adev->gds.oa.gfx_partition_size = 4;
6234                 adev->gds.oa.cs_partition_size = 4;
6235         }
6236 }
6237
6238 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6239 {
6240         u32 data, mask;
6241
6242         data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6243         data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6244
6245         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6246         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6247
6248         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6249
6250         return (~data) & mask;
6251 }
6252
6253 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6254 {
6255         int i, j, k, counter, active_cu_number = 0;
6256         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6257         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6258
6259         memset(cu_info, 0, sizeof(*cu_info));
6260
6261         mutex_lock(&adev->grbm_idx_mutex);
6262         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6263                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6264                         mask = 1;
6265                         ao_bitmap = 0;
6266                         counter = 0;
6267                         gfx_v8_0_select_se_sh(adev, i, j);
6268                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6269                         cu_info->bitmap[i][j] = bitmap;
6270
6271                         for (k = 0; k < 16; k ++) {
6272                                 if (bitmap & mask) {
6273                                         if (counter < 2)
6274                                                 ao_bitmap |= mask;
6275                                         counter ++;
6276                                 }
6277                                 mask <<= 1;
6278                         }
6279                         active_cu_number += counter;
6280                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6281                 }
6282         }
6283         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
6284         mutex_unlock(&adev->grbm_idx_mutex);
6285
6286         cu_info->number = active_cu_number;
6287         cu_info->ao_cu_mask = ao_cu_mask;
6288 }