Merge branches 'for-4.10/asus', 'for-4.10/cp2112', 'for-4.10/i2c-hid-nopower', 'for...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS     1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD    1
80 #define CLE_BPM_SERDES_CMD    0
81
82 /* BPM Register Address*/
83 enum {
84         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
85         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
86         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
87         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
88         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
89         BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength        14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160
161 static const u32 golden_settings_tonga_a11[] =
162 {
163         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166         mmGB_GPU_ID, 0x0000000f, 0x00000000,
167         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179
180 static const u32 tonga_golden_common_all[] =
181 {
182         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
274         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
275         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
276         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
277         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
278         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
279         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
280         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
281         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
282         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
283         mmSQ_CONFIG, 0x07f80000, 0x01180000,
284         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
285         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
286         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
287         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
288         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
289         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
290 };
291
292 static const u32 polaris11_golden_common_all[] =
293 {
294         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
296         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
297         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
298         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
299         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
300 };
301
302 static const u32 golden_settings_polaris10_a11[] =
303 {
304         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
305         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
306         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
307         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
308         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
309         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
310         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
311         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
312         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
313         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
314         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
315         mmSQ_CONFIG, 0x07f80000, 0x07180000,
316         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
317         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
318         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
319         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
320         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
321 };
322
323 static const u32 polaris10_golden_common_all[] =
324 {
325         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
326         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
327         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
328         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
332         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
333 };
334
335 static const u32 fiji_golden_common_all[] =
336 {
337         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
338         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
339         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
340         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
341         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
342         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
343         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
344         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
345         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
346         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
347 };
348
349 static const u32 golden_settings_fiji_a10[] =
350 {
351         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
352         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
353         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
354         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
355         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
356         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
357         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
358         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
359         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
360         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
361         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
362 };
363
364 static const u32 fiji_mgcg_cgcg_init[] =
365 {
366         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
367         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
368         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
370         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
371         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
372         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
373         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
374         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
375         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
376         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
377         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
382         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
384         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
385         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
386         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
387         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
388         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
391         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
392         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
393         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
395         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
396         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
397         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
398         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
399         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
400         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
401 };
402
403 static const u32 golden_settings_iceland_a11[] =
404 {
405         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
406         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
407         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
408         mmGB_GPU_ID, 0x0000000f, 0x00000000,
409         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
410         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
411         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
412         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
413         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
414         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
415         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
416         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
417         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
418         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
419         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
420         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
421 };
422
423 static const u32 iceland_golden_common_all[] =
424 {
425         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
426         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
427         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
428         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
429         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
430         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
431         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
432         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
433 };
434
435 static const u32 iceland_mgcg_cgcg_init[] =
436 {
437         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
438         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
439         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
442         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
443         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
444         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
446         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
448         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
451         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
452         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
455         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
456         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
457         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
458         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
459         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
460         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
462         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
463         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
464         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
466         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
467         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
468         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
469         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
470         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
471         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
472         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
473         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
474         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
475         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
476         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
477         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
478         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
479         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
480         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
481         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
482         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
483         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
484         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
485         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
486         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
487         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
488         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
489         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
490         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
491         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
492         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
493         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
494         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
495         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
496         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
497         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
498         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
499         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
500         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
501 };
502
503 static const u32 cz_golden_settings_a11[] =
504 {
505         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
506         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
507         mmGB_GPU_ID, 0x0000000f, 0x00000000,
508         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
509         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
510         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
511         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
512         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
513         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
516         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
517 };
518
519 static const u32 cz_golden_common_all[] =
520 {
521         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
523         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
525         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
529 };
530
531 static const u32 cz_mgcg_cgcg_init[] =
532 {
533         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
536         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
537         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
538         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
539         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
540         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
541         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
542         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
543         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
544         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
551         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
552         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
553         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
554         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
555         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
558         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
559         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
560         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
562         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
563         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
564         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
565         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
566         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
567         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
568         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
569         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
570         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
571         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
572         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
573         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
574         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
575         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
576         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
577         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
578         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
579         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
580         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
581         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
582         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
583         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
584         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
585         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
586         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
587         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
588         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
589         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
590         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
591         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
592         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
593         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
594         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
595         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
596         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
597         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
598         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
599         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
600         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
601         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
602         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
603         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
604         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
605         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
606         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
607         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
608 };
609
610 static const u32 stoney_golden_settings_a11[] =
611 {
612         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
613         mmGB_GPU_ID, 0x0000000f, 0x00000000,
614         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
615         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
616         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
617         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
618         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
619         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
620         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
621         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
622 };
623
624 static const u32 stoney_golden_common_all[] =
625 {
626         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
627         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
628         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
629         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
630         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
631         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
632         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
633         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
634 };
635
636 static const u32 stoney_mgcg_cgcg_init[] =
637 {
638         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
639         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
640         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
642         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
643 };
644
645 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
646 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
649 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
650 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
651
652 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
653 {
654         switch (adev->asic_type) {
655         case CHIP_TOPAZ:
656                 amdgpu_program_register_sequence(adev,
657                                                  iceland_mgcg_cgcg_init,
658                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
659                 amdgpu_program_register_sequence(adev,
660                                                  golden_settings_iceland_a11,
661                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
662                 amdgpu_program_register_sequence(adev,
663                                                  iceland_golden_common_all,
664                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
665                 break;
666         case CHIP_FIJI:
667                 amdgpu_program_register_sequence(adev,
668                                                  fiji_mgcg_cgcg_init,
669                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
670                 amdgpu_program_register_sequence(adev,
671                                                  golden_settings_fiji_a10,
672                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
673                 amdgpu_program_register_sequence(adev,
674                                                  fiji_golden_common_all,
675                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
676                 break;
677
678         case CHIP_TONGA:
679                 amdgpu_program_register_sequence(adev,
680                                                  tonga_mgcg_cgcg_init,
681                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
682                 amdgpu_program_register_sequence(adev,
683                                                  golden_settings_tonga_a11,
684                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
685                 amdgpu_program_register_sequence(adev,
686                                                  tonga_golden_common_all,
687                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
688                 break;
689         case CHIP_POLARIS11:
690                 amdgpu_program_register_sequence(adev,
691                                                  golden_settings_polaris11_a11,
692                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
693                 amdgpu_program_register_sequence(adev,
694                                                  polaris11_golden_common_all,
695                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
696                 break;
697         case CHIP_POLARIS10:
698                 amdgpu_program_register_sequence(adev,
699                                                  golden_settings_polaris10_a11,
700                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
701                 amdgpu_program_register_sequence(adev,
702                                                  polaris10_golden_common_all,
703                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
704                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
705                 if (adev->pdev->revision == 0xc7 &&
706                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
707                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
708                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
709                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
710                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
711                 }
712                 break;
713         case CHIP_CARRIZO:
714                 amdgpu_program_register_sequence(adev,
715                                                  cz_mgcg_cgcg_init,
716                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
717                 amdgpu_program_register_sequence(adev,
718                                                  cz_golden_settings_a11,
719                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
720                 amdgpu_program_register_sequence(adev,
721                                                  cz_golden_common_all,
722                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
723                 break;
724         case CHIP_STONEY:
725                 amdgpu_program_register_sequence(adev,
726                                                  stoney_mgcg_cgcg_init,
727                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
728                 amdgpu_program_register_sequence(adev,
729                                                  stoney_golden_settings_a11,
730                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
731                 amdgpu_program_register_sequence(adev,
732                                                  stoney_golden_common_all,
733                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
734                 break;
735         default:
736                 break;
737         }
738 }
739
740 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
741 {
742         int i;
743
744         adev->gfx.scratch.num_reg = 7;
745         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
746         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
747                 adev->gfx.scratch.free[i] = true;
748                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
749         }
750 }
751
752 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
753 {
754         struct amdgpu_device *adev = ring->adev;
755         uint32_t scratch;
756         uint32_t tmp = 0;
757         unsigned i;
758         int r;
759
760         r = amdgpu_gfx_scratch_get(adev, &scratch);
761         if (r) {
762                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
763                 return r;
764         }
765         WREG32(scratch, 0xCAFEDEAD);
766         r = amdgpu_ring_alloc(ring, 3);
767         if (r) {
768                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
769                           ring->idx, r);
770                 amdgpu_gfx_scratch_free(adev, scratch);
771                 return r;
772         }
773         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
774         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
775         amdgpu_ring_write(ring, 0xDEADBEEF);
776         amdgpu_ring_commit(ring);
777
778         for (i = 0; i < adev->usec_timeout; i++) {
779                 tmp = RREG32(scratch);
780                 if (tmp == 0xDEADBEEF)
781                         break;
782                 DRM_UDELAY(1);
783         }
784         if (i < adev->usec_timeout) {
785                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
786                          ring->idx, i);
787         } else {
788                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
789                           ring->idx, scratch, tmp);
790                 r = -EINVAL;
791         }
792         amdgpu_gfx_scratch_free(adev, scratch);
793         return r;
794 }
795
796 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
797 {
798         struct amdgpu_device *adev = ring->adev;
799         struct amdgpu_ib ib;
800         struct fence *f = NULL;
801         uint32_t scratch;
802         uint32_t tmp = 0;
803         long r;
804
805         r = amdgpu_gfx_scratch_get(adev, &scratch);
806         if (r) {
807                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
808                 return r;
809         }
810         WREG32(scratch, 0xCAFEDEAD);
811         memset(&ib, 0, sizeof(ib));
812         r = amdgpu_ib_get(adev, NULL, 256, &ib);
813         if (r) {
814                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
815                 goto err1;
816         }
817         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
818         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
819         ib.ptr[2] = 0xDEADBEEF;
820         ib.length_dw = 3;
821
822         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
823         if (r)
824                 goto err2;
825
826         r = fence_wait_timeout(f, false, timeout);
827         if (r == 0) {
828                 DRM_ERROR("amdgpu: IB test timed out.\n");
829                 r = -ETIMEDOUT;
830                 goto err2;
831         } else if (r < 0) {
832                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
833                 goto err2;
834         }
835         tmp = RREG32(scratch);
836         if (tmp == 0xDEADBEEF) {
837                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
838                 r = 0;
839         } else {
840                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
841                           scratch, tmp);
842                 r = -EINVAL;
843         }
844 err2:
845         amdgpu_ib_free(adev, &ib, NULL);
846         fence_put(f);
847 err1:
848         amdgpu_gfx_scratch_free(adev, scratch);
849         return r;
850 }
851
852
853 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
854         release_firmware(adev->gfx.pfp_fw);
855         adev->gfx.pfp_fw = NULL;
856         release_firmware(adev->gfx.me_fw);
857         adev->gfx.me_fw = NULL;
858         release_firmware(adev->gfx.ce_fw);
859         adev->gfx.ce_fw = NULL;
860         release_firmware(adev->gfx.rlc_fw);
861         adev->gfx.rlc_fw = NULL;
862         release_firmware(adev->gfx.mec_fw);
863         adev->gfx.mec_fw = NULL;
864         if ((adev->asic_type != CHIP_STONEY) &&
865             (adev->asic_type != CHIP_TOPAZ))
866                 release_firmware(adev->gfx.mec2_fw);
867         adev->gfx.mec2_fw = NULL;
868
869         kfree(adev->gfx.rlc.register_list_format);
870 }
871
872 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
873 {
874         const char *chip_name;
875         char fw_name[30];
876         int err;
877         struct amdgpu_firmware_info *info = NULL;
878         const struct common_firmware_header *header = NULL;
879         const struct gfx_firmware_header_v1_0 *cp_hdr;
880         const struct rlc_firmware_header_v2_0 *rlc_hdr;
881         unsigned int *tmp = NULL, i;
882
883         DRM_DEBUG("\n");
884
885         switch (adev->asic_type) {
886         case CHIP_TOPAZ:
887                 chip_name = "topaz";
888                 break;
889         case CHIP_TONGA:
890                 chip_name = "tonga";
891                 break;
892         case CHIP_CARRIZO:
893                 chip_name = "carrizo";
894                 break;
895         case CHIP_FIJI:
896                 chip_name = "fiji";
897                 break;
898         case CHIP_POLARIS11:
899                 chip_name = "polaris11";
900                 break;
901         case CHIP_POLARIS10:
902                 chip_name = "polaris10";
903                 break;
904         case CHIP_STONEY:
905                 chip_name = "stoney";
906                 break;
907         default:
908                 BUG();
909         }
910
911         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
912         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
913         if (err)
914                 goto out;
915         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
916         if (err)
917                 goto out;
918         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
919         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
920         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
921
922         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
923         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
924         if (err)
925                 goto out;
926         err = amdgpu_ucode_validate(adev->gfx.me_fw);
927         if (err)
928                 goto out;
929         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
930         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
931         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
932
933         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
934         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
935         if (err)
936                 goto out;
937         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
938         if (err)
939                 goto out;
940         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
941         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
942         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
943
944         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
945         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
946         if (err)
947                 goto out;
948         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
949         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
950         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
951         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
952
953         adev->gfx.rlc.save_and_restore_offset =
954                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
955         adev->gfx.rlc.clear_state_descriptor_offset =
956                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
957         adev->gfx.rlc.avail_scratch_ram_locations =
958                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
959         adev->gfx.rlc.reg_restore_list_size =
960                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
961         adev->gfx.rlc.reg_list_format_start =
962                         le32_to_cpu(rlc_hdr->reg_list_format_start);
963         adev->gfx.rlc.reg_list_format_separate_start =
964                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
965         adev->gfx.rlc.starting_offsets_start =
966                         le32_to_cpu(rlc_hdr->starting_offsets_start);
967         adev->gfx.rlc.reg_list_format_size_bytes =
968                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
969         adev->gfx.rlc.reg_list_size_bytes =
970                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
971
972         adev->gfx.rlc.register_list_format =
973                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
974                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
975
976         if (!adev->gfx.rlc.register_list_format) {
977                 err = -ENOMEM;
978                 goto out;
979         }
980
981         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
982                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
983         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
984                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
985
986         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
987
988         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
989                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
990         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
991                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
992
993         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
994         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
995         if (err)
996                 goto out;
997         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
998         if (err)
999                 goto out;
1000         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1001         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1002         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1003
1004         if ((adev->asic_type != CHIP_STONEY) &&
1005             (adev->asic_type != CHIP_TOPAZ)) {
1006                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1007                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1008                 if (!err) {
1009                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1010                         if (err)
1011                                 goto out;
1012                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1013                                 adev->gfx.mec2_fw->data;
1014                         adev->gfx.mec2_fw_version =
1015                                 le32_to_cpu(cp_hdr->header.ucode_version);
1016                         adev->gfx.mec2_feature_version =
1017                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1018                 } else {
1019                         err = 0;
1020                         adev->gfx.mec2_fw = NULL;
1021                 }
1022         }
1023
1024         if (adev->firmware.smu_load) {
1025                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1026                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1027                 info->fw = adev->gfx.pfp_fw;
1028                 header = (const struct common_firmware_header *)info->fw->data;
1029                 adev->firmware.fw_size +=
1030                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1031
1032                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1033                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1034                 info->fw = adev->gfx.me_fw;
1035                 header = (const struct common_firmware_header *)info->fw->data;
1036                 adev->firmware.fw_size +=
1037                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1038
1039                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1040                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1041                 info->fw = adev->gfx.ce_fw;
1042                 header = (const struct common_firmware_header *)info->fw->data;
1043                 adev->firmware.fw_size +=
1044                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1045
1046                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1047                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1048                 info->fw = adev->gfx.rlc_fw;
1049                 header = (const struct common_firmware_header *)info->fw->data;
1050                 adev->firmware.fw_size +=
1051                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1052
1053                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1054                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1055                 info->fw = adev->gfx.mec_fw;
1056                 header = (const struct common_firmware_header *)info->fw->data;
1057                 adev->firmware.fw_size +=
1058                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1059
1060                 if (adev->gfx.mec2_fw) {
1061                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1062                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1063                         info->fw = adev->gfx.mec2_fw;
1064                         header = (const struct common_firmware_header *)info->fw->data;
1065                         adev->firmware.fw_size +=
1066                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1067                 }
1068
1069         }
1070
1071 out:
1072         if (err) {
1073                 dev_err(adev->dev,
1074                         "gfx8: Failed to load firmware \"%s\"\n",
1075                         fw_name);
1076                 release_firmware(adev->gfx.pfp_fw);
1077                 adev->gfx.pfp_fw = NULL;
1078                 release_firmware(adev->gfx.me_fw);
1079                 adev->gfx.me_fw = NULL;
1080                 release_firmware(adev->gfx.ce_fw);
1081                 adev->gfx.ce_fw = NULL;
1082                 release_firmware(adev->gfx.rlc_fw);
1083                 adev->gfx.rlc_fw = NULL;
1084                 release_firmware(adev->gfx.mec_fw);
1085                 adev->gfx.mec_fw = NULL;
1086                 release_firmware(adev->gfx.mec2_fw);
1087                 adev->gfx.mec2_fw = NULL;
1088         }
1089         return err;
1090 }
1091
1092 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1093                                     volatile u32 *buffer)
1094 {
1095         u32 count = 0, i;
1096         const struct cs_section_def *sect = NULL;
1097         const struct cs_extent_def *ext = NULL;
1098
1099         if (adev->gfx.rlc.cs_data == NULL)
1100                 return;
1101         if (buffer == NULL)
1102                 return;
1103
1104         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1105         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1106
1107         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1108         buffer[count++] = cpu_to_le32(0x80000000);
1109         buffer[count++] = cpu_to_le32(0x80000000);
1110
1111         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1112                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1113                         if (sect->id == SECT_CONTEXT) {
1114                                 buffer[count++] =
1115                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1116                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1117                                                 PACKET3_SET_CONTEXT_REG_START);
1118                                 for (i = 0; i < ext->reg_count; i++)
1119                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1120                         } else {
1121                                 return;
1122                         }
1123                 }
1124         }
1125
1126         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1127         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1128                         PACKET3_SET_CONTEXT_REG_START);
1129         switch (adev->asic_type) {
1130         case CHIP_TONGA:
1131         case CHIP_POLARIS10:
1132                 buffer[count++] = cpu_to_le32(0x16000012);
1133                 buffer[count++] = cpu_to_le32(0x0000002A);
1134                 break;
1135         case CHIP_POLARIS11:
1136                 buffer[count++] = cpu_to_le32(0x16000012);
1137                 buffer[count++] = cpu_to_le32(0x00000000);
1138                 break;
1139         case CHIP_FIJI:
1140                 buffer[count++] = cpu_to_le32(0x3a00161a);
1141                 buffer[count++] = cpu_to_le32(0x0000002e);
1142                 break;
1143         case CHIP_TOPAZ:
1144         case CHIP_CARRIZO:
1145                 buffer[count++] = cpu_to_le32(0x00000002);
1146                 buffer[count++] = cpu_to_le32(0x00000000);
1147                 break;
1148         case CHIP_STONEY:
1149                 buffer[count++] = cpu_to_le32(0x00000000);
1150                 buffer[count++] = cpu_to_le32(0x00000000);
1151                 break;
1152         default:
1153                 buffer[count++] = cpu_to_le32(0x00000000);
1154                 buffer[count++] = cpu_to_le32(0x00000000);
1155                 break;
1156         }
1157
1158         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1159         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1160
1161         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1162         buffer[count++] = cpu_to_le32(0);
1163 }
1164
1165 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1166 {
1167         const __le32 *fw_data;
1168         volatile u32 *dst_ptr;
1169         int me, i, max_me = 4;
1170         u32 bo_offset = 0;
1171         u32 table_offset, table_size;
1172
1173         if (adev->asic_type == CHIP_CARRIZO)
1174                 max_me = 5;
1175
1176         /* write the cp table buffer */
1177         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1178         for (me = 0; me < max_me; me++) {
1179                 if (me == 0) {
1180                         const struct gfx_firmware_header_v1_0 *hdr =
1181                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1182                         fw_data = (const __le32 *)
1183                                 (adev->gfx.ce_fw->data +
1184                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1185                         table_offset = le32_to_cpu(hdr->jt_offset);
1186                         table_size = le32_to_cpu(hdr->jt_size);
1187                 } else if (me == 1) {
1188                         const struct gfx_firmware_header_v1_0 *hdr =
1189                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1190                         fw_data = (const __le32 *)
1191                                 (adev->gfx.pfp_fw->data +
1192                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1193                         table_offset = le32_to_cpu(hdr->jt_offset);
1194                         table_size = le32_to_cpu(hdr->jt_size);
1195                 } else if (me == 2) {
1196                         const struct gfx_firmware_header_v1_0 *hdr =
1197                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1198                         fw_data = (const __le32 *)
1199                                 (adev->gfx.me_fw->data +
1200                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1201                         table_offset = le32_to_cpu(hdr->jt_offset);
1202                         table_size = le32_to_cpu(hdr->jt_size);
1203                 } else if (me == 3) {
1204                         const struct gfx_firmware_header_v1_0 *hdr =
1205                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1206                         fw_data = (const __le32 *)
1207                                 (adev->gfx.mec_fw->data +
1208                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1209                         table_offset = le32_to_cpu(hdr->jt_offset);
1210                         table_size = le32_to_cpu(hdr->jt_size);
1211                 } else  if (me == 4) {
1212                         const struct gfx_firmware_header_v1_0 *hdr =
1213                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1214                         fw_data = (const __le32 *)
1215                                 (adev->gfx.mec2_fw->data +
1216                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1217                         table_offset = le32_to_cpu(hdr->jt_offset);
1218                         table_size = le32_to_cpu(hdr->jt_size);
1219                 }
1220
1221                 for (i = 0; i < table_size; i ++) {
1222                         dst_ptr[bo_offset + i] =
1223                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1224                 }
1225
1226                 bo_offset += table_size;
1227         }
1228 }
1229
1230 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1231 {
1232         int r;
1233
1234         /* clear state block */
1235         if (adev->gfx.rlc.clear_state_obj) {
1236                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1237                 if (unlikely(r != 0))
1238                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1239                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1240                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1241                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1242                 adev->gfx.rlc.clear_state_obj = NULL;
1243         }
1244
1245         /* jump table block */
1246         if (adev->gfx.rlc.cp_table_obj) {
1247                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1248                 if (unlikely(r != 0))
1249                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1250                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1251                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1252                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1253                 adev->gfx.rlc.cp_table_obj = NULL;
1254         }
1255 }
1256
1257 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1258 {
1259         volatile u32 *dst_ptr;
1260         u32 dws;
1261         const struct cs_section_def *cs_data;
1262         int r;
1263
1264         adev->gfx.rlc.cs_data = vi_cs_data;
1265
1266         cs_data = adev->gfx.rlc.cs_data;
1267
1268         if (cs_data) {
1269                 /* clear state block */
1270                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1271
1272                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1273                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1274                                              AMDGPU_GEM_DOMAIN_VRAM,
1275                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1276                                              NULL, NULL,
1277                                              &adev->gfx.rlc.clear_state_obj);
1278                         if (r) {
1279                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1280                                 gfx_v8_0_rlc_fini(adev);
1281                                 return r;
1282                         }
1283                 }
1284                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1285                 if (unlikely(r != 0)) {
1286                         gfx_v8_0_rlc_fini(adev);
1287                         return r;
1288                 }
1289                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1290                                   &adev->gfx.rlc.clear_state_gpu_addr);
1291                 if (r) {
1292                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1293                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1294                         gfx_v8_0_rlc_fini(adev);
1295                         return r;
1296                 }
1297
1298                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1299                 if (r) {
1300                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1301                         gfx_v8_0_rlc_fini(adev);
1302                         return r;
1303                 }
1304                 /* set up the cs buffer */
1305                 dst_ptr = adev->gfx.rlc.cs_ptr;
1306                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1307                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1308                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1309         }
1310
1311         if ((adev->asic_type == CHIP_CARRIZO) ||
1312             (adev->asic_type == CHIP_STONEY)) {
1313                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1314                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1315                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1316                                              AMDGPU_GEM_DOMAIN_VRAM,
1317                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1318                                              NULL, NULL,
1319                                              &adev->gfx.rlc.cp_table_obj);
1320                         if (r) {
1321                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1322                                 return r;
1323                         }
1324                 }
1325
1326                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1327                 if (unlikely(r != 0)) {
1328                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1329                         return r;
1330                 }
1331                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1332                                   &adev->gfx.rlc.cp_table_gpu_addr);
1333                 if (r) {
1334                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1335                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1336                         return r;
1337                 }
1338                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1339                 if (r) {
1340                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1341                         return r;
1342                 }
1343
1344                 cz_init_cp_jump_table(adev);
1345
1346                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1347                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1348         }
1349
1350         return 0;
1351 }
1352
1353 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1354 {
1355         int r;
1356
1357         if (adev->gfx.mec.hpd_eop_obj) {
1358                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1359                 if (unlikely(r != 0))
1360                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1361                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1362                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1363                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1364                 adev->gfx.mec.hpd_eop_obj = NULL;
1365         }
1366 }
1367
1368 #define MEC_HPD_SIZE 2048
1369
1370 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1371 {
1372         int r;
1373         u32 *hpd;
1374
1375         /*
1376          * we assign only 1 pipe because all other pipes will
1377          * be handled by KFD
1378          */
1379         adev->gfx.mec.num_mec = 1;
1380         adev->gfx.mec.num_pipe = 1;
1381         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1382
1383         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1384                 r = amdgpu_bo_create(adev,
1385                                      adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1386                                      PAGE_SIZE, true,
1387                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1388                                      &adev->gfx.mec.hpd_eop_obj);
1389                 if (r) {
1390                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1391                         return r;
1392                 }
1393         }
1394
1395         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1396         if (unlikely(r != 0)) {
1397                 gfx_v8_0_mec_fini(adev);
1398                 return r;
1399         }
1400         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1401                           &adev->gfx.mec.hpd_eop_gpu_addr);
1402         if (r) {
1403                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1404                 gfx_v8_0_mec_fini(adev);
1405                 return r;
1406         }
1407         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1408         if (r) {
1409                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1410                 gfx_v8_0_mec_fini(adev);
1411                 return r;
1412         }
1413
1414         memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1415
1416         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1417         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1418
1419         return 0;
1420 }
1421
1422 static const u32 vgpr_init_compute_shader[] =
1423 {
1424         0x7e000209, 0x7e020208,
1425         0x7e040207, 0x7e060206,
1426         0x7e080205, 0x7e0a0204,
1427         0x7e0c0203, 0x7e0e0202,
1428         0x7e100201, 0x7e120200,
1429         0x7e140209, 0x7e160208,
1430         0x7e180207, 0x7e1a0206,
1431         0x7e1c0205, 0x7e1e0204,
1432         0x7e200203, 0x7e220202,
1433         0x7e240201, 0x7e260200,
1434         0x7e280209, 0x7e2a0208,
1435         0x7e2c0207, 0x7e2e0206,
1436         0x7e300205, 0x7e320204,
1437         0x7e340203, 0x7e360202,
1438         0x7e380201, 0x7e3a0200,
1439         0x7e3c0209, 0x7e3e0208,
1440         0x7e400207, 0x7e420206,
1441         0x7e440205, 0x7e460204,
1442         0x7e480203, 0x7e4a0202,
1443         0x7e4c0201, 0x7e4e0200,
1444         0x7e500209, 0x7e520208,
1445         0x7e540207, 0x7e560206,
1446         0x7e580205, 0x7e5a0204,
1447         0x7e5c0203, 0x7e5e0202,
1448         0x7e600201, 0x7e620200,
1449         0x7e640209, 0x7e660208,
1450         0x7e680207, 0x7e6a0206,
1451         0x7e6c0205, 0x7e6e0204,
1452         0x7e700203, 0x7e720202,
1453         0x7e740201, 0x7e760200,
1454         0x7e780209, 0x7e7a0208,
1455         0x7e7c0207, 0x7e7e0206,
1456         0xbf8a0000, 0xbf810000,
1457 };
1458
1459 static const u32 sgpr_init_compute_shader[] =
1460 {
1461         0xbe8a0100, 0xbe8c0102,
1462         0xbe8e0104, 0xbe900106,
1463         0xbe920108, 0xbe940100,
1464         0xbe960102, 0xbe980104,
1465         0xbe9a0106, 0xbe9c0108,
1466         0xbe9e0100, 0xbea00102,
1467         0xbea20104, 0xbea40106,
1468         0xbea60108, 0xbea80100,
1469         0xbeaa0102, 0xbeac0104,
1470         0xbeae0106, 0xbeb00108,
1471         0xbeb20100, 0xbeb40102,
1472         0xbeb60104, 0xbeb80106,
1473         0xbeba0108, 0xbebc0100,
1474         0xbebe0102, 0xbec00104,
1475         0xbec20106, 0xbec40108,
1476         0xbec60100, 0xbec80102,
1477         0xbee60004, 0xbee70005,
1478         0xbeea0006, 0xbeeb0007,
1479         0xbee80008, 0xbee90009,
1480         0xbefc0000, 0xbf8a0000,
1481         0xbf810000, 0x00000000,
1482 };
1483
1484 static const u32 vgpr_init_regs[] =
1485 {
1486         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1487         mmCOMPUTE_RESOURCE_LIMITS, 0,
1488         mmCOMPUTE_NUM_THREAD_X, 256*4,
1489         mmCOMPUTE_NUM_THREAD_Y, 1,
1490         mmCOMPUTE_NUM_THREAD_Z, 1,
1491         mmCOMPUTE_PGM_RSRC2, 20,
1492         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1493         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1494         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1495         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1496         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1497         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1498         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1499         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1500         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1501         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1502 };
1503
1504 static const u32 sgpr1_init_regs[] =
1505 {
1506         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1507         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1508         mmCOMPUTE_NUM_THREAD_X, 256*5,
1509         mmCOMPUTE_NUM_THREAD_Y, 1,
1510         mmCOMPUTE_NUM_THREAD_Z, 1,
1511         mmCOMPUTE_PGM_RSRC2, 20,
1512         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1513         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1514         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1515         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1516         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1517         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1518         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1519         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1520         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1521         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1522 };
1523
1524 static const u32 sgpr2_init_regs[] =
1525 {
1526         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1527         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1528         mmCOMPUTE_NUM_THREAD_X, 256*5,
1529         mmCOMPUTE_NUM_THREAD_Y, 1,
1530         mmCOMPUTE_NUM_THREAD_Z, 1,
1531         mmCOMPUTE_PGM_RSRC2, 20,
1532         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1533         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1534         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1535         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1536         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1537         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1538         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1539         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1540         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1541         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1542 };
1543
1544 static const u32 sec_ded_counter_registers[] =
1545 {
1546         mmCPC_EDC_ATC_CNT,
1547         mmCPC_EDC_SCRATCH_CNT,
1548         mmCPC_EDC_UCODE_CNT,
1549         mmCPF_EDC_ATC_CNT,
1550         mmCPF_EDC_ROQ_CNT,
1551         mmCPF_EDC_TAG_CNT,
1552         mmCPG_EDC_ATC_CNT,
1553         mmCPG_EDC_DMA_CNT,
1554         mmCPG_EDC_TAG_CNT,
1555         mmDC_EDC_CSINVOC_CNT,
1556         mmDC_EDC_RESTORE_CNT,
1557         mmDC_EDC_STATE_CNT,
1558         mmGDS_EDC_CNT,
1559         mmGDS_EDC_GRBM_CNT,
1560         mmGDS_EDC_OA_DED,
1561         mmSPI_EDC_CNT,
1562         mmSQC_ATC_EDC_GATCL1_CNT,
1563         mmSQC_EDC_CNT,
1564         mmSQ_EDC_DED_CNT,
1565         mmSQ_EDC_INFO,
1566         mmSQ_EDC_SEC_CNT,
1567         mmTCC_EDC_CNT,
1568         mmTCP_ATC_EDC_GATCL1_CNT,
1569         mmTCP_EDC_CNT,
1570         mmTD_EDC_CNT
1571 };
1572
1573 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1574 {
1575         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1576         struct amdgpu_ib ib;
1577         struct fence *f = NULL;
1578         int r, i;
1579         u32 tmp;
1580         unsigned total_size, vgpr_offset, sgpr_offset;
1581         u64 gpu_addr;
1582
1583         /* only supported on CZ */
1584         if (adev->asic_type != CHIP_CARRIZO)
1585                 return 0;
1586
1587         /* bail if the compute ring is not ready */
1588         if (!ring->ready)
1589                 return 0;
1590
1591         tmp = RREG32(mmGB_EDC_MODE);
1592         WREG32(mmGB_EDC_MODE, 0);
1593
1594         total_size =
1595                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1596         total_size +=
1597                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1598         total_size +=
1599                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1600         total_size = ALIGN(total_size, 256);
1601         vgpr_offset = total_size;
1602         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1603         sgpr_offset = total_size;
1604         total_size += sizeof(sgpr_init_compute_shader);
1605
1606         /* allocate an indirect buffer to put the commands in */
1607         memset(&ib, 0, sizeof(ib));
1608         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1609         if (r) {
1610                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1611                 return r;
1612         }
1613
1614         /* load the compute shaders */
1615         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1616                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1617
1618         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1619                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1620
1621         /* init the ib length to 0 */
1622         ib.length_dw = 0;
1623
1624         /* VGPR */
1625         /* write the register state for the compute dispatch */
1626         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1627                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1628                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1629                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1630         }
1631         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1632         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1633         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1634         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1635         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1636         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1637
1638         /* write dispatch packet */
1639         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1640         ib.ptr[ib.length_dw++] = 8; /* x */
1641         ib.ptr[ib.length_dw++] = 1; /* y */
1642         ib.ptr[ib.length_dw++] = 1; /* z */
1643         ib.ptr[ib.length_dw++] =
1644                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1645
1646         /* write CS partial flush packet */
1647         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1648         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1649
1650         /* SGPR1 */
1651         /* write the register state for the compute dispatch */
1652         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1653                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1654                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1655                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1656         }
1657         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1658         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1659         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1660         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1661         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1662         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1663
1664         /* write dispatch packet */
1665         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1666         ib.ptr[ib.length_dw++] = 8; /* x */
1667         ib.ptr[ib.length_dw++] = 1; /* y */
1668         ib.ptr[ib.length_dw++] = 1; /* z */
1669         ib.ptr[ib.length_dw++] =
1670                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1671
1672         /* write CS partial flush packet */
1673         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1674         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1675
1676         /* SGPR2 */
1677         /* write the register state for the compute dispatch */
1678         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1679                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1680                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1681                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1682         }
1683         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1684         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1685         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1686         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1687         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1688         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1689
1690         /* write dispatch packet */
1691         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1692         ib.ptr[ib.length_dw++] = 8; /* x */
1693         ib.ptr[ib.length_dw++] = 1; /* y */
1694         ib.ptr[ib.length_dw++] = 1; /* z */
1695         ib.ptr[ib.length_dw++] =
1696                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1697
1698         /* write CS partial flush packet */
1699         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1700         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1701
1702         /* shedule the ib on the ring */
1703         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1704         if (r) {
1705                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1706                 goto fail;
1707         }
1708
1709         /* wait for the GPU to finish processing the IB */
1710         r = fence_wait(f, false);
1711         if (r) {
1712                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1713                 goto fail;
1714         }
1715
1716         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1717         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1718         WREG32(mmGB_EDC_MODE, tmp);
1719
1720         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1721         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1722         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1723
1724
1725         /* read back registers to clear the counters */
1726         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1727                 RREG32(sec_ded_counter_registers[i]);
1728
1729 fail:
1730         amdgpu_ib_free(adev, &ib, NULL);
1731         fence_put(f);
1732
1733         return r;
1734 }
1735
1736 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1737 {
1738         u32 gb_addr_config;
1739         u32 mc_shared_chmap, mc_arb_ramcfg;
1740         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1741         u32 tmp;
1742         int ret;
1743
1744         switch (adev->asic_type) {
1745         case CHIP_TOPAZ:
1746                 adev->gfx.config.max_shader_engines = 1;
1747                 adev->gfx.config.max_tile_pipes = 2;
1748                 adev->gfx.config.max_cu_per_sh = 6;
1749                 adev->gfx.config.max_sh_per_se = 1;
1750                 adev->gfx.config.max_backends_per_se = 2;
1751                 adev->gfx.config.max_texture_channel_caches = 2;
1752                 adev->gfx.config.max_gprs = 256;
1753                 adev->gfx.config.max_gs_threads = 32;
1754                 adev->gfx.config.max_hw_contexts = 8;
1755
1756                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1757                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1758                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1759                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1760                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1761                 break;
1762         case CHIP_FIJI:
1763                 adev->gfx.config.max_shader_engines = 4;
1764                 adev->gfx.config.max_tile_pipes = 16;
1765                 adev->gfx.config.max_cu_per_sh = 16;
1766                 adev->gfx.config.max_sh_per_se = 1;
1767                 adev->gfx.config.max_backends_per_se = 4;
1768                 adev->gfx.config.max_texture_channel_caches = 16;
1769                 adev->gfx.config.max_gprs = 256;
1770                 adev->gfx.config.max_gs_threads = 32;
1771                 adev->gfx.config.max_hw_contexts = 8;
1772
1773                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1774                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1775                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1776                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1777                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1778                 break;
1779         case CHIP_POLARIS11:
1780                 ret = amdgpu_atombios_get_gfx_info(adev);
1781                 if (ret)
1782                         return ret;
1783                 adev->gfx.config.max_gprs = 256;
1784                 adev->gfx.config.max_gs_threads = 32;
1785                 adev->gfx.config.max_hw_contexts = 8;
1786
1787                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1788                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1789                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1790                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1791                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1792                 break;
1793         case CHIP_POLARIS10:
1794                 ret = amdgpu_atombios_get_gfx_info(adev);
1795                 if (ret)
1796                         return ret;
1797                 adev->gfx.config.max_gprs = 256;
1798                 adev->gfx.config.max_gs_threads = 32;
1799                 adev->gfx.config.max_hw_contexts = 8;
1800
1801                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1802                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1803                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1804                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1805                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1806                 break;
1807         case CHIP_TONGA:
1808                 adev->gfx.config.max_shader_engines = 4;
1809                 adev->gfx.config.max_tile_pipes = 8;
1810                 adev->gfx.config.max_cu_per_sh = 8;
1811                 adev->gfx.config.max_sh_per_se = 1;
1812                 adev->gfx.config.max_backends_per_se = 2;
1813                 adev->gfx.config.max_texture_channel_caches = 8;
1814                 adev->gfx.config.max_gprs = 256;
1815                 adev->gfx.config.max_gs_threads = 32;
1816                 adev->gfx.config.max_hw_contexts = 8;
1817
1818                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1819                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1820                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1821                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1822                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1823                 break;
1824         case CHIP_CARRIZO:
1825                 adev->gfx.config.max_shader_engines = 1;
1826                 adev->gfx.config.max_tile_pipes = 2;
1827                 adev->gfx.config.max_sh_per_se = 1;
1828                 adev->gfx.config.max_backends_per_se = 2;
1829
1830                 switch (adev->pdev->revision) {
1831                 case 0xc4:
1832                 case 0x84:
1833                 case 0xc8:
1834                 case 0xcc:
1835                 case 0xe1:
1836                 case 0xe3:
1837                         /* B10 */
1838                         adev->gfx.config.max_cu_per_sh = 8;
1839                         break;
1840                 case 0xc5:
1841                 case 0x81:
1842                 case 0x85:
1843                 case 0xc9:
1844                 case 0xcd:
1845                 case 0xe2:
1846                 case 0xe4:
1847                         /* B8 */
1848                         adev->gfx.config.max_cu_per_sh = 6;
1849                         break;
1850                 case 0xc6:
1851                 case 0xca:
1852                 case 0xce:
1853                 case 0x88:
1854                         /* B6 */
1855                         adev->gfx.config.max_cu_per_sh = 6;
1856                         break;
1857                 case 0xc7:
1858                 case 0x87:
1859                 case 0xcb:
1860                 case 0xe5:
1861                 case 0x89:
1862                 default:
1863                         /* B4 */
1864                         adev->gfx.config.max_cu_per_sh = 4;
1865                         break;
1866                 }
1867
1868                 adev->gfx.config.max_texture_channel_caches = 2;
1869                 adev->gfx.config.max_gprs = 256;
1870                 adev->gfx.config.max_gs_threads = 32;
1871                 adev->gfx.config.max_hw_contexts = 8;
1872
1873                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1874                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1875                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1876                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1877                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1878                 break;
1879         case CHIP_STONEY:
1880                 adev->gfx.config.max_shader_engines = 1;
1881                 adev->gfx.config.max_tile_pipes = 2;
1882                 adev->gfx.config.max_sh_per_se = 1;
1883                 adev->gfx.config.max_backends_per_se = 1;
1884
1885                 switch (adev->pdev->revision) {
1886                 case 0xc0:
1887                 case 0xc1:
1888                 case 0xc2:
1889                 case 0xc4:
1890                 case 0xc8:
1891                 case 0xc9:
1892                         adev->gfx.config.max_cu_per_sh = 3;
1893                         break;
1894                 case 0xd0:
1895                 case 0xd1:
1896                 case 0xd2:
1897                 default:
1898                         adev->gfx.config.max_cu_per_sh = 2;
1899                         break;
1900                 }
1901
1902                 adev->gfx.config.max_texture_channel_caches = 2;
1903                 adev->gfx.config.max_gprs = 256;
1904                 adev->gfx.config.max_gs_threads = 16;
1905                 adev->gfx.config.max_hw_contexts = 8;
1906
1907                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1908                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1909                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1910                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1911                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1912                 break;
1913         default:
1914                 adev->gfx.config.max_shader_engines = 2;
1915                 adev->gfx.config.max_tile_pipes = 4;
1916                 adev->gfx.config.max_cu_per_sh = 2;
1917                 adev->gfx.config.max_sh_per_se = 1;
1918                 adev->gfx.config.max_backends_per_se = 2;
1919                 adev->gfx.config.max_texture_channel_caches = 4;
1920                 adev->gfx.config.max_gprs = 256;
1921                 adev->gfx.config.max_gs_threads = 32;
1922                 adev->gfx.config.max_hw_contexts = 8;
1923
1924                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1925                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1926                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1927                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1928                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1929                 break;
1930         }
1931
1932         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1933         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1934         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1935
1936         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1937         adev->gfx.config.mem_max_burst_length_bytes = 256;
1938         if (adev->flags & AMD_IS_APU) {
1939                 /* Get memory bank mapping mode. */
1940                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1941                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1942                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1943
1944                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1945                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1946                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1947
1948                 /* Validate settings in case only one DIMM installed. */
1949                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1950                         dimm00_addr_map = 0;
1951                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1952                         dimm01_addr_map = 0;
1953                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1954                         dimm10_addr_map = 0;
1955                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1956                         dimm11_addr_map = 0;
1957
1958                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1959                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1960                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1961                         adev->gfx.config.mem_row_size_in_kb = 2;
1962                 else
1963                         adev->gfx.config.mem_row_size_in_kb = 1;
1964         } else {
1965                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1966                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1967                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1968                         adev->gfx.config.mem_row_size_in_kb = 4;
1969         }
1970
1971         adev->gfx.config.shader_engine_tile_size = 32;
1972         adev->gfx.config.num_gpus = 1;
1973         adev->gfx.config.multi_gpu_tile_size = 64;
1974
1975         /* fix up row size */
1976         switch (adev->gfx.config.mem_row_size_in_kb) {
1977         case 1:
1978         default:
1979                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1980                 break;
1981         case 2:
1982                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1983                 break;
1984         case 4:
1985                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1986                 break;
1987         }
1988         adev->gfx.config.gb_addr_config = gb_addr_config;
1989
1990         return 0;
1991 }
1992
1993 static int gfx_v8_0_sw_init(void *handle)
1994 {
1995         int i, r;
1996         struct amdgpu_ring *ring;
1997         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1998
1999         /* EOP Event */
2000         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2001         if (r)
2002                 return r;
2003
2004         /* Privileged reg */
2005         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2006         if (r)
2007                 return r;
2008
2009         /* Privileged inst */
2010         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2011         if (r)
2012                 return r;
2013
2014         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2015
2016         gfx_v8_0_scratch_init(adev);
2017
2018         r = gfx_v8_0_init_microcode(adev);
2019         if (r) {
2020                 DRM_ERROR("Failed to load gfx firmware!\n");
2021                 return r;
2022         }
2023
2024         r = gfx_v8_0_rlc_init(adev);
2025         if (r) {
2026                 DRM_ERROR("Failed to init rlc BOs!\n");
2027                 return r;
2028         }
2029
2030         r = gfx_v8_0_mec_init(adev);
2031         if (r) {
2032                 DRM_ERROR("Failed to init MEC BOs!\n");
2033                 return r;
2034         }
2035
2036         /* set up the gfx ring */
2037         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2038                 ring = &adev->gfx.gfx_ring[i];
2039                 ring->ring_obj = NULL;
2040                 sprintf(ring->name, "gfx");
2041                 /* no gfx doorbells on iceland */
2042                 if (adev->asic_type != CHIP_TOPAZ) {
2043                         ring->use_doorbell = true;
2044                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2045                 }
2046
2047                 r = amdgpu_ring_init(adev, ring, 1024,
2048                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2049                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2050                                      AMDGPU_RING_TYPE_GFX);
2051                 if (r)
2052                         return r;
2053         }
2054
2055         /* set up the compute queues */
2056         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2057                 unsigned irq_type;
2058
2059                 /* max 32 queues per MEC */
2060                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2061                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2062                         break;
2063                 }
2064                 ring = &adev->gfx.compute_ring[i];
2065                 ring->ring_obj = NULL;
2066                 ring->use_doorbell = true;
2067                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2068                 ring->me = 1; /* first MEC */
2069                 ring->pipe = i / 8;
2070                 ring->queue = i % 8;
2071                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2072                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2073                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2074                 r = amdgpu_ring_init(adev, ring, 1024,
2075                                      PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2076                                      &adev->gfx.eop_irq, irq_type,
2077                                      AMDGPU_RING_TYPE_COMPUTE);
2078                 if (r)
2079                         return r;
2080         }
2081
2082         /* reserve GDS, GWS and OA resource for gfx */
2083         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2084                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2085                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2086         if (r)
2087                 return r;
2088
2089         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2090                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2091                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2092         if (r)
2093                 return r;
2094
2095         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2096                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2097                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2098         if (r)
2099                 return r;
2100
2101         adev->gfx.ce_ram_size = 0x8000;
2102
2103         r = gfx_v8_0_gpu_early_init(adev);
2104         if (r)
2105                 return r;
2106
2107         return 0;
2108 }
2109
2110 static int gfx_v8_0_sw_fini(void *handle)
2111 {
2112         int i;
2113         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2114
2115         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2116         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2117         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2118
2119         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2120                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2121         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2122                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2123
2124         gfx_v8_0_mec_fini(adev);
2125         gfx_v8_0_rlc_fini(adev);
2126         gfx_v8_0_free_microcode(adev);
2127
2128         return 0;
2129 }
2130
2131 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2132 {
2133         uint32_t *modearray, *mod2array;
2134         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2135         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2136         u32 reg_offset;
2137
2138         modearray = adev->gfx.config.tile_mode_array;
2139         mod2array = adev->gfx.config.macrotile_mode_array;
2140
2141         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2142                 modearray[reg_offset] = 0;
2143
2144         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2145                 mod2array[reg_offset] = 0;
2146
2147         switch (adev->asic_type) {
2148         case CHIP_TOPAZ:
2149                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2150                                 PIPE_CONFIG(ADDR_SURF_P2) |
2151                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2152                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2153                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2154                                 PIPE_CONFIG(ADDR_SURF_P2) |
2155                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2156                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2157                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2158                                 PIPE_CONFIG(ADDR_SURF_P2) |
2159                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2160                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2161                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2162                                 PIPE_CONFIG(ADDR_SURF_P2) |
2163                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2164                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2165                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2166                                 PIPE_CONFIG(ADDR_SURF_P2) |
2167                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2168                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2169                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2170                                 PIPE_CONFIG(ADDR_SURF_P2) |
2171                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2172                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2173                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2174                                 PIPE_CONFIG(ADDR_SURF_P2) |
2175                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2176                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2177                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2178                                 PIPE_CONFIG(ADDR_SURF_P2));
2179                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2180                                 PIPE_CONFIG(ADDR_SURF_P2) |
2181                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2182                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2183                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2184                                  PIPE_CONFIG(ADDR_SURF_P2) |
2185                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2186                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2188                                  PIPE_CONFIG(ADDR_SURF_P2) |
2189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2191                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2192                                  PIPE_CONFIG(ADDR_SURF_P2) |
2193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2195                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2196                                  PIPE_CONFIG(ADDR_SURF_P2) |
2197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2199                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2200                                  PIPE_CONFIG(ADDR_SURF_P2) |
2201                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2202                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2203                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2204                                  PIPE_CONFIG(ADDR_SURF_P2) |
2205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2207                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2208                                  PIPE_CONFIG(ADDR_SURF_P2) |
2209                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2210                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2211                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2212                                  PIPE_CONFIG(ADDR_SURF_P2) |
2213                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2214                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2215                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2216                                  PIPE_CONFIG(ADDR_SURF_P2) |
2217                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2218                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2219                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2220                                  PIPE_CONFIG(ADDR_SURF_P2) |
2221                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2222                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2223                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2224                                  PIPE_CONFIG(ADDR_SURF_P2) |
2225                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2226                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2227                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2228                                  PIPE_CONFIG(ADDR_SURF_P2) |
2229                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2230                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2231                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2232                                  PIPE_CONFIG(ADDR_SURF_P2) |
2233                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2234                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2235                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2236                                  PIPE_CONFIG(ADDR_SURF_P2) |
2237                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2238                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2239                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2240                                  PIPE_CONFIG(ADDR_SURF_P2) |
2241                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2242                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2243                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2244                                  PIPE_CONFIG(ADDR_SURF_P2) |
2245                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2246                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2247                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2248                                  PIPE_CONFIG(ADDR_SURF_P2) |
2249                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2250                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2251
2252                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2253                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2254                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2255                                 NUM_BANKS(ADDR_SURF_8_BANK));
2256                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2257                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2258                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2259                                 NUM_BANKS(ADDR_SURF_8_BANK));
2260                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2261                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2262                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2263                                 NUM_BANKS(ADDR_SURF_8_BANK));
2264                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2265                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2266                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2267                                 NUM_BANKS(ADDR_SURF_8_BANK));
2268                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2269                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2270                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2271                                 NUM_BANKS(ADDR_SURF_8_BANK));
2272                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2274                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2275                                 NUM_BANKS(ADDR_SURF_8_BANK));
2276                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2278                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2279                                 NUM_BANKS(ADDR_SURF_8_BANK));
2280                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2281                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2282                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2283                                 NUM_BANKS(ADDR_SURF_16_BANK));
2284                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2285                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2286                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287                                 NUM_BANKS(ADDR_SURF_16_BANK));
2288                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2289                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2291                                  NUM_BANKS(ADDR_SURF_16_BANK));
2292                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2293                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2294                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2295                                  NUM_BANKS(ADDR_SURF_16_BANK));
2296                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2297                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2298                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2299                                  NUM_BANKS(ADDR_SURF_16_BANK));
2300                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2302                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2303                                  NUM_BANKS(ADDR_SURF_16_BANK));
2304                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2306                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2307                                  NUM_BANKS(ADDR_SURF_8_BANK));
2308
2309                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2310                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2311                             reg_offset != 23)
2312                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2313
2314                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2315                         if (reg_offset != 7)
2316                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2317
2318                 break;
2319         case CHIP_FIJI:
2320                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2323                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2324                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2327                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2328                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2331                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2332                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2335                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2336                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2339                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2340                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2341                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2343                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2344                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2347                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2350                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2351                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2352                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2353                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2354                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2355                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2356                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2357                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2358                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2359                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2361                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2362                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2363                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2366                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2367                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2368                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2369                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2370                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2371                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2373                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2374                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2377                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2378                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2379                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2381                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2383                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2385                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2386                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2387                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2388                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2389                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2390                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2391                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2393                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2394                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2395                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2397                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2398                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2399                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2401                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2402                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2403                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2405                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2406                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2407                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2409                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2410                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2411                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2412                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2413                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2414                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2415                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2416                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2417                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2418                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2419                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2421                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2422                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2423                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2425                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2426                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2427                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2430                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2435                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2437                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2438                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2440                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2442
2443                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2444                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2445                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2446                                 NUM_BANKS(ADDR_SURF_8_BANK));
2447                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2449                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2450                                 NUM_BANKS(ADDR_SURF_8_BANK));
2451                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2453                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2454                                 NUM_BANKS(ADDR_SURF_8_BANK));
2455                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2457                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2458                                 NUM_BANKS(ADDR_SURF_8_BANK));
2459                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2461                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2462                                 NUM_BANKS(ADDR_SURF_8_BANK));
2463                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2465                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2466                                 NUM_BANKS(ADDR_SURF_8_BANK));
2467                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470                                 NUM_BANKS(ADDR_SURF_8_BANK));
2471                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2473                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2474                                 NUM_BANKS(ADDR_SURF_8_BANK));
2475                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2477                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2478                                 NUM_BANKS(ADDR_SURF_8_BANK));
2479                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2481                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2482                                  NUM_BANKS(ADDR_SURF_8_BANK));
2483                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2485                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2486                                  NUM_BANKS(ADDR_SURF_8_BANK));
2487                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2489                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2490                                  NUM_BANKS(ADDR_SURF_8_BANK));
2491                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2493                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2494                                  NUM_BANKS(ADDR_SURF_8_BANK));
2495                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2498                                  NUM_BANKS(ADDR_SURF_4_BANK));
2499
2500                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2501                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2502
2503                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2504                         if (reg_offset != 7)
2505                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2506
2507                 break;
2508         case CHIP_TONGA:
2509                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2512                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2513                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2516                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2517                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2520                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2521                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2524                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2525                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2528                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2529                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2530                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2532                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2533                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2534                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2536                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2537                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2540                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2541                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2542                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2543                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2544                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2545                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2546                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2547                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2548                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2550                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2551                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2552                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2554                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2555                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2557                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2558                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2559                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2560                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2561                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2562                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2564                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2567                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2568                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2571                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2572                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2575                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2576                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2577                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2578                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2579                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2580                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2581                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2582                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2583                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2584                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2586                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2587                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2588                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2590                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2591                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2592                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2594                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2595                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2596                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2598                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2599                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2600                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2601                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2602                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2603                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2604                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2605                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2606                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2607                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2608                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2610                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2611                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2612                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2614                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2615                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2616                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2618                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2619                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2620                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2622                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2623                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2624                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2626                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2627                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2628                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2629                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2630                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2631
2632                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2634                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2635                                 NUM_BANKS(ADDR_SURF_16_BANK));
2636                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2638                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2639                                 NUM_BANKS(ADDR_SURF_16_BANK));
2640                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2642                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2643                                 NUM_BANKS(ADDR_SURF_16_BANK));
2644                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2646                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2647                                 NUM_BANKS(ADDR_SURF_16_BANK));
2648                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2650                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2651                                 NUM_BANKS(ADDR_SURF_16_BANK));
2652                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2654                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2655                                 NUM_BANKS(ADDR_SURF_16_BANK));
2656                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2659                                 NUM_BANKS(ADDR_SURF_16_BANK));
2660                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2662                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2663                                 NUM_BANKS(ADDR_SURF_16_BANK));
2664                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2666                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2667                                 NUM_BANKS(ADDR_SURF_16_BANK));
2668                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2670                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2671                                  NUM_BANKS(ADDR_SURF_16_BANK));
2672                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2674                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2675                                  NUM_BANKS(ADDR_SURF_16_BANK));
2676                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2679                                  NUM_BANKS(ADDR_SURF_8_BANK));
2680                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2683                                  NUM_BANKS(ADDR_SURF_4_BANK));
2684                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687                                  NUM_BANKS(ADDR_SURF_4_BANK));
2688
2689                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2690                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2691
2692                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2693                         if (reg_offset != 7)
2694                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2695
2696                 break;
2697         case CHIP_POLARIS11:
2698                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2699                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2700                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2701                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2702                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2706                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2710                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2714                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2718                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2722                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2726                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2730                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2732                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2733                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2735                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2736                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2739                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2741                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2744                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2745                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2747                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2748                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2749                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2751                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2752                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2755                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2757                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2760                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2761                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2763                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2764                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2768                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2769                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2771                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2772                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2773                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2775                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2777                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2779                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2780                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2781                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2783                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2784                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2785                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2787                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2788                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2789                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2791                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2792                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2793                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2795                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2796                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2797                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2799                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2800                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2801                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2803                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2804                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2805                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2807                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2808                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2811                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2812                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2813                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2815                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2816                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2819                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2820
2821                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2823                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824                                 NUM_BANKS(ADDR_SURF_16_BANK));
2825
2826                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829                                 NUM_BANKS(ADDR_SURF_16_BANK));
2830
2831                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834                                 NUM_BANKS(ADDR_SURF_16_BANK));
2835
2836                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839                                 NUM_BANKS(ADDR_SURF_16_BANK));
2840
2841                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2844                                 NUM_BANKS(ADDR_SURF_16_BANK));
2845
2846                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849                                 NUM_BANKS(ADDR_SURF_16_BANK));
2850
2851                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2853                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2854                                 NUM_BANKS(ADDR_SURF_16_BANK));
2855
2856                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2859                                 NUM_BANKS(ADDR_SURF_16_BANK));
2860
2861                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864                                 NUM_BANKS(ADDR_SURF_16_BANK));
2865
2866                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2867                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869                                 NUM_BANKS(ADDR_SURF_16_BANK));
2870
2871                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2873                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2874                                 NUM_BANKS(ADDR_SURF_16_BANK));
2875
2876                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2879                                 NUM_BANKS(ADDR_SURF_16_BANK));
2880
2881                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2883                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2884                                 NUM_BANKS(ADDR_SURF_8_BANK));
2885
2886                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2889                                 NUM_BANKS(ADDR_SURF_4_BANK));
2890
2891                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2892                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2893
2894                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2895                         if (reg_offset != 7)
2896                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2897
2898                 break;
2899         case CHIP_POLARIS10:
2900                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2902                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2908                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2916                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2920                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2925                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2930                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2932                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2933                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2934                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2935                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2936                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2937                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2939                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2943                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2944                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2945                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2946                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2947                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2948                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2950                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2951                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2952                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2953                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2954                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2955                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2959                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2963                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2964                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2966                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2968                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2970                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2971                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2974                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2975                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2977                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2979                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2981                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2982                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2983                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2985                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2986                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2987                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2989                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2990                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2991                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2992                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2993                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2994                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2995                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2997                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2998                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2999                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3001                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3002                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3003                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3005                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3006                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3007                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3008                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3009                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3010                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3011                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3012                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3013                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3014                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3015                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3017                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3018                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3019                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3020                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3021                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3022
3023                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3024                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3025                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3026                                 NUM_BANKS(ADDR_SURF_16_BANK));
3027
3028                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3030                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031                                 NUM_BANKS(ADDR_SURF_16_BANK));
3032
3033                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3035                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036                                 NUM_BANKS(ADDR_SURF_16_BANK));
3037
3038                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3040                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3041                                 NUM_BANKS(ADDR_SURF_16_BANK));
3042
3043                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3045                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3046                                 NUM_BANKS(ADDR_SURF_16_BANK));
3047
3048                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3050                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3051                                 NUM_BANKS(ADDR_SURF_16_BANK));
3052
3053                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3056                                 NUM_BANKS(ADDR_SURF_16_BANK));
3057
3058                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3060                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3061                                 NUM_BANKS(ADDR_SURF_16_BANK));
3062
3063                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3065                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066                                 NUM_BANKS(ADDR_SURF_16_BANK));
3067
3068                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3070                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3071                                 NUM_BANKS(ADDR_SURF_16_BANK));
3072
3073                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3075                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076                                 NUM_BANKS(ADDR_SURF_16_BANK));
3077
3078                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3081                                 NUM_BANKS(ADDR_SURF_8_BANK));
3082
3083                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3085                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3086                                 NUM_BANKS(ADDR_SURF_4_BANK));
3087
3088                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3091                                 NUM_BANKS(ADDR_SURF_4_BANK));
3092
3093                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3094                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3095
3096                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3097                         if (reg_offset != 7)
3098                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3099
3100                 break;
3101         case CHIP_STONEY:
3102                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3103                                 PIPE_CONFIG(ADDR_SURF_P2) |
3104                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3105                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3106                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107                                 PIPE_CONFIG(ADDR_SURF_P2) |
3108                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3109                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3110                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111                                 PIPE_CONFIG(ADDR_SURF_P2) |
3112                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3114                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115                                 PIPE_CONFIG(ADDR_SURF_P2) |
3116                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3117                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3119                                 PIPE_CONFIG(ADDR_SURF_P2) |
3120                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3122                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3123                                 PIPE_CONFIG(ADDR_SURF_P2) |
3124                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3126                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3127                                 PIPE_CONFIG(ADDR_SURF_P2) |
3128                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3130                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3131                                 PIPE_CONFIG(ADDR_SURF_P2));
3132                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3133                                 PIPE_CONFIG(ADDR_SURF_P2) |
3134                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3135                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3136                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3137                                  PIPE_CONFIG(ADDR_SURF_P2) |
3138                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3139                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3140                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3141                                  PIPE_CONFIG(ADDR_SURF_P2) |
3142                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3143                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3144                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3145                                  PIPE_CONFIG(ADDR_SURF_P2) |
3146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3148                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3149                                  PIPE_CONFIG(ADDR_SURF_P2) |
3150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3152                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3153                                  PIPE_CONFIG(ADDR_SURF_P2) |
3154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3156                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3157                                  PIPE_CONFIG(ADDR_SURF_P2) |
3158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3160                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3161                                  PIPE_CONFIG(ADDR_SURF_P2) |
3162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3164                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3165                                  PIPE_CONFIG(ADDR_SURF_P2) |
3166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3168                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3169                                  PIPE_CONFIG(ADDR_SURF_P2) |
3170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3172                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3173                                  PIPE_CONFIG(ADDR_SURF_P2) |
3174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3176                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3177                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3180                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3181                                  PIPE_CONFIG(ADDR_SURF_P2) |
3182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3184                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3185                                  PIPE_CONFIG(ADDR_SURF_P2) |
3186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3188                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3189                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3192                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3193                                  PIPE_CONFIG(ADDR_SURF_P2) |
3194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3196                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3197                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3201                                  PIPE_CONFIG(ADDR_SURF_P2) |
3202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3204
3205                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3206                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3207                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3208                                 NUM_BANKS(ADDR_SURF_8_BANK));
3209                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3211                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3212                                 NUM_BANKS(ADDR_SURF_8_BANK));
3213                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3215                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3216                                 NUM_BANKS(ADDR_SURF_8_BANK));
3217                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3220                                 NUM_BANKS(ADDR_SURF_8_BANK));
3221                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224                                 NUM_BANKS(ADDR_SURF_8_BANK));
3225                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3228                                 NUM_BANKS(ADDR_SURF_8_BANK));
3229                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3230                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3231                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3232                                 NUM_BANKS(ADDR_SURF_8_BANK));
3233                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3234                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3235                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3236                                 NUM_BANKS(ADDR_SURF_16_BANK));
3237                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3238                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3239                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240                                 NUM_BANKS(ADDR_SURF_16_BANK));
3241                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3242                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3243                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244                                  NUM_BANKS(ADDR_SURF_16_BANK));
3245                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3247                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248                                  NUM_BANKS(ADDR_SURF_16_BANK));
3249                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3250                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3251                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252                                  NUM_BANKS(ADDR_SURF_16_BANK));
3253                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3255                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3256                                  NUM_BANKS(ADDR_SURF_16_BANK));
3257                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3259                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3260                                  NUM_BANKS(ADDR_SURF_8_BANK));
3261
3262                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3263                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3264                             reg_offset != 23)
3265                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3266
3267                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3268                         if (reg_offset != 7)
3269                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3270
3271                 break;
3272         default:
3273                 dev_warn(adev->dev,
3274                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3275                          adev->asic_type);
3276
3277         case CHIP_CARRIZO:
3278                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3279                                 PIPE_CONFIG(ADDR_SURF_P2) |
3280                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3281                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3282                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283                                 PIPE_CONFIG(ADDR_SURF_P2) |
3284                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3285                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3287                                 PIPE_CONFIG(ADDR_SURF_P2) |
3288                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3291                                 PIPE_CONFIG(ADDR_SURF_P2) |
3292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3295                                 PIPE_CONFIG(ADDR_SURF_P2) |
3296                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3298                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3299                                 PIPE_CONFIG(ADDR_SURF_P2) |
3300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3302                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3303                                 PIPE_CONFIG(ADDR_SURF_P2) |
3304                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3305                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3306                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3307                                 PIPE_CONFIG(ADDR_SURF_P2));
3308                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3309                                 PIPE_CONFIG(ADDR_SURF_P2) |
3310                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3311                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3312                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3313                                  PIPE_CONFIG(ADDR_SURF_P2) |
3314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3316                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3317                                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3320                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3321                                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3324                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3325                                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3328                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3329                                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3332                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3333                                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3336                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3337                                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3340                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3341                                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3345                                  PIPE_CONFIG(ADDR_SURF_P2) |
3346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3349                                  PIPE_CONFIG(ADDR_SURF_P2) |
3350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3353                                  PIPE_CONFIG(ADDR_SURF_P2) |
3354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3357                                  PIPE_CONFIG(ADDR_SURF_P2) |
3358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3360                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3361                                  PIPE_CONFIG(ADDR_SURF_P2) |
3362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3364                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3365                                  PIPE_CONFIG(ADDR_SURF_P2) |
3366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3368                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3369                                  PIPE_CONFIG(ADDR_SURF_P2) |
3370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3372                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3373                                  PIPE_CONFIG(ADDR_SURF_P2) |
3374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3376                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3377                                  PIPE_CONFIG(ADDR_SURF_P2) |
3378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3380
3381                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3382                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3383                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384                                 NUM_BANKS(ADDR_SURF_8_BANK));
3385                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3387                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388                                 NUM_BANKS(ADDR_SURF_8_BANK));
3389                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3391                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3392                                 NUM_BANKS(ADDR_SURF_8_BANK));
3393                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396                                 NUM_BANKS(ADDR_SURF_8_BANK));
3397                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400                                 NUM_BANKS(ADDR_SURF_8_BANK));
3401                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3404                                 NUM_BANKS(ADDR_SURF_8_BANK));
3405                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408                                 NUM_BANKS(ADDR_SURF_8_BANK));
3409                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3410                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3411                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3412                                 NUM_BANKS(ADDR_SURF_16_BANK));
3413                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3414                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3415                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416                                 NUM_BANKS(ADDR_SURF_16_BANK));
3417                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3418                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3419                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420                                  NUM_BANKS(ADDR_SURF_16_BANK));
3421                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3422                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3423                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3424                                  NUM_BANKS(ADDR_SURF_16_BANK));
3425                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3426                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3427                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3428                                  NUM_BANKS(ADDR_SURF_16_BANK));
3429                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3430                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3431                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3432                                  NUM_BANKS(ADDR_SURF_16_BANK));
3433                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3434                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3435                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3436                                  NUM_BANKS(ADDR_SURF_8_BANK));
3437
3438                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3439                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3440                             reg_offset != 23)
3441                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3442
3443                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3444                         if (reg_offset != 7)
3445                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3446
3447                 break;
3448         }
3449 }
3450
3451 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3452                                   u32 se_num, u32 sh_num, u32 instance)
3453 {
3454         u32 data;
3455
3456         if (instance == 0xffffffff)
3457                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3458         else
3459                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3460
3461         if (se_num == 0xffffffff)
3462                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3463         else
3464                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3465
3466         if (sh_num == 0xffffffff)
3467                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3468         else
3469                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3470
3471         WREG32(mmGRBM_GFX_INDEX, data);
3472 }
3473
3474 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3475 {
3476         return (u32)((1ULL << bit_width) - 1);
3477 }
3478
3479 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3480 {
3481         u32 data, mask;
3482
3483         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3484                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3485
3486         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3487
3488         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3489                                        adev->gfx.config.max_sh_per_se);
3490
3491         return (~data) & mask;
3492 }
3493
3494 static void
3495 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3496 {
3497         switch (adev->asic_type) {
3498         case CHIP_FIJI:
3499                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3500                           RB_XSEL2(1) | PKR_MAP(2) |
3501                           PKR_XSEL(1) | PKR_YSEL(1) |
3502                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3503                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3504                            SE_PAIR_YSEL(2);
3505                 break;
3506         case CHIP_TONGA:
3507         case CHIP_POLARIS10:
3508                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3509                           SE_XSEL(1) | SE_YSEL(1);
3510                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3511                            SE_PAIR_YSEL(2);
3512                 break;
3513         case CHIP_TOPAZ:
3514         case CHIP_CARRIZO:
3515                 *rconf |= RB_MAP_PKR0(2);
3516                 *rconf1 |= 0x0;
3517                 break;
3518         case CHIP_POLARIS11:
3519                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3520                           SE_XSEL(1) | SE_YSEL(1);
3521                 *rconf1 |= 0x0;
3522                 break;
3523         case CHIP_STONEY:
3524                 *rconf |= 0x0;
3525                 *rconf1 |= 0x0;
3526                 break;
3527         default:
3528                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3529                 break;
3530         }
3531 }
3532
3533 static void
3534 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3535                                         u32 raster_config, u32 raster_config_1,
3536                                         unsigned rb_mask, unsigned num_rb)
3537 {
3538         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3539         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3540         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3541         unsigned rb_per_se = num_rb / num_se;
3542         unsigned se_mask[4];
3543         unsigned se;
3544
3545         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3546         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3547         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3548         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3549
3550         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3551         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3552         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3553
3554         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3555                              (!se_mask[2] && !se_mask[3]))) {
3556                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3557
3558                 if (!se_mask[0] && !se_mask[1]) {
3559                         raster_config_1 |=
3560                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3561                 } else {
3562                         raster_config_1 |=
3563                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3564                 }
3565         }
3566
3567         for (se = 0; se < num_se; se++) {
3568                 unsigned raster_config_se = raster_config;
3569                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3570                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3571                 int idx = (se / 2) * 2;
3572
3573                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3574                         raster_config_se &= ~SE_MAP_MASK;
3575
3576                         if (!se_mask[idx]) {
3577                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3578                         } else {
3579                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3580                         }
3581                 }
3582
3583                 pkr0_mask &= rb_mask;
3584                 pkr1_mask &= rb_mask;
3585                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3586                         raster_config_se &= ~PKR_MAP_MASK;
3587
3588                         if (!pkr0_mask) {
3589                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3590                         } else {
3591                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3592                         }
3593                 }
3594
3595                 if (rb_per_se >= 2) {
3596                         unsigned rb0_mask = 1 << (se * rb_per_se);
3597                         unsigned rb1_mask = rb0_mask << 1;
3598
3599                         rb0_mask &= rb_mask;
3600                         rb1_mask &= rb_mask;
3601                         if (!rb0_mask || !rb1_mask) {
3602                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3603
3604                                 if (!rb0_mask) {
3605                                         raster_config_se |=
3606                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3607                                 } else {
3608                                         raster_config_se |=
3609                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3610                                 }
3611                         }
3612
3613                         if (rb_per_se > 2) {
3614                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3615                                 rb1_mask = rb0_mask << 1;
3616                                 rb0_mask &= rb_mask;
3617                                 rb1_mask &= rb_mask;
3618                                 if (!rb0_mask || !rb1_mask) {
3619                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3620
3621                                         if (!rb0_mask) {
3622                                                 raster_config_se |=
3623                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3624                                         } else {
3625                                                 raster_config_se |=
3626                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3627                                         }
3628                                 }
3629                         }
3630                 }
3631
3632                 /* GRBM_GFX_INDEX has a different offset on VI */
3633                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3634                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3635                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3636         }
3637
3638         /* GRBM_GFX_INDEX has a different offset on VI */
3639         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3640 }
3641
3642 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3643 {
3644         int i, j;
3645         u32 data;
3646         u32 raster_config = 0, raster_config_1 = 0;
3647         u32 active_rbs = 0;
3648         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3649                                         adev->gfx.config.max_sh_per_se;
3650         unsigned num_rb_pipes;
3651
3652         mutex_lock(&adev->grbm_idx_mutex);
3653         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3654                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3655                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3656                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3657                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3658                                                rb_bitmap_width_per_sh);
3659                 }
3660         }
3661         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3662
3663         adev->gfx.config.backend_enable_mask = active_rbs;
3664         adev->gfx.config.num_rbs = hweight32(active_rbs);
3665
3666         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3667                              adev->gfx.config.max_shader_engines, 16);
3668
3669         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3670
3671         if (!adev->gfx.config.backend_enable_mask ||
3672                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3673                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3674                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3675         } else {
3676                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3677                                                         adev->gfx.config.backend_enable_mask,
3678                                                         num_rb_pipes);
3679         }
3680
3681         mutex_unlock(&adev->grbm_idx_mutex);
3682 }
3683
3684 /**
3685  * gfx_v8_0_init_compute_vmid - gart enable
3686  *
3687  * @rdev: amdgpu_device pointer
3688  *
3689  * Initialize compute vmid sh_mem registers
3690  *
3691  */
3692 #define DEFAULT_SH_MEM_BASES    (0x6000)
3693 #define FIRST_COMPUTE_VMID      (8)
3694 #define LAST_COMPUTE_VMID       (16)
3695 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3696 {
3697         int i;
3698         uint32_t sh_mem_config;
3699         uint32_t sh_mem_bases;
3700
3701         /*
3702          * Configure apertures:
3703          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3704          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3705          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3706          */
3707         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3708
3709         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3710                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3711                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3712                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3713                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3714                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3715
3716         mutex_lock(&adev->srbm_mutex);
3717         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3718                 vi_srbm_select(adev, 0, 0, 0, i);
3719                 /* CP and shaders */
3720                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3721                 WREG32(mmSH_MEM_APE1_BASE, 1);
3722                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3723                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3724         }
3725         vi_srbm_select(adev, 0, 0, 0, 0);
3726         mutex_unlock(&adev->srbm_mutex);
3727 }
3728
3729 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3730 {
3731         u32 tmp;
3732         int i;
3733
3734         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3735         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3736         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3737         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3738
3739         gfx_v8_0_tiling_mode_table_init(adev);
3740         gfx_v8_0_setup_rb(adev);
3741         gfx_v8_0_get_cu_info(adev);
3742
3743         /* XXX SH_MEM regs */
3744         /* where to put LDS, scratch, GPUVM in FSA64 space */
3745         mutex_lock(&adev->srbm_mutex);
3746         for (i = 0; i < 16; i++) {
3747                 vi_srbm_select(adev, 0, 0, 0, i);
3748                 /* CP and shaders */
3749                 if (i == 0) {
3750                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3751                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3752                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3753                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3754                         WREG32(mmSH_MEM_CONFIG, tmp);
3755                 } else {
3756                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3757                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3758                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3759                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3760                         WREG32(mmSH_MEM_CONFIG, tmp);
3761                 }
3762
3763                 WREG32(mmSH_MEM_APE1_BASE, 1);
3764                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3765                 WREG32(mmSH_MEM_BASES, 0);
3766         }
3767         vi_srbm_select(adev, 0, 0, 0, 0);
3768         mutex_unlock(&adev->srbm_mutex);
3769
3770         gfx_v8_0_init_compute_vmid(adev);
3771
3772         mutex_lock(&adev->grbm_idx_mutex);
3773         /*
3774          * making sure that the following register writes will be broadcasted
3775          * to all the shaders
3776          */
3777         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3778
3779         WREG32(mmPA_SC_FIFO_SIZE,
3780                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3781                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3782                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3783                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3784                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3785                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3786                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3787                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3788         mutex_unlock(&adev->grbm_idx_mutex);
3789
3790 }
3791
3792 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3793 {
3794         u32 i, j, k;
3795         u32 mask;
3796
3797         mutex_lock(&adev->grbm_idx_mutex);
3798         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3799                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3800                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3801                         for (k = 0; k < adev->usec_timeout; k++) {
3802                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3803                                         break;
3804                                 udelay(1);
3805                         }
3806                 }
3807         }
3808         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3809         mutex_unlock(&adev->grbm_idx_mutex);
3810
3811         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3812                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3813                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3814                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3815         for (k = 0; k < adev->usec_timeout; k++) {
3816                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3817                         break;
3818                 udelay(1);
3819         }
3820 }
3821
3822 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3823                                                bool enable)
3824 {
3825         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3826
3827         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3828         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3829         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3830         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3831
3832         WREG32(mmCP_INT_CNTL_RING0, tmp);
3833 }
3834
3835 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3836 {
3837         /* csib */
3838         WREG32(mmRLC_CSIB_ADDR_HI,
3839                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3840         WREG32(mmRLC_CSIB_ADDR_LO,
3841                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3842         WREG32(mmRLC_CSIB_LENGTH,
3843                         adev->gfx.rlc.clear_state_size);
3844 }
3845
3846 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3847                                 int ind_offset,
3848                                 int list_size,
3849                                 int *unique_indices,
3850                                 int *indices_count,
3851                                 int max_indices,
3852                                 int *ind_start_offsets,
3853                                 int *offset_count,
3854                                 int max_offset)
3855 {
3856         int indices;
3857         bool new_entry = true;
3858
3859         for (; ind_offset < list_size; ind_offset++) {
3860
3861                 if (new_entry) {
3862                         new_entry = false;
3863                         ind_start_offsets[*offset_count] = ind_offset;
3864                         *offset_count = *offset_count + 1;
3865                         BUG_ON(*offset_count >= max_offset);
3866                 }
3867
3868                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3869                         new_entry = true;
3870                         continue;
3871                 }
3872
3873                 ind_offset += 2;
3874
3875                 /* look for the matching indice */
3876                 for (indices = 0;
3877                         indices < *indices_count;
3878                         indices++) {
3879                         if (unique_indices[indices] ==
3880                                 register_list_format[ind_offset])
3881                                 break;
3882                 }
3883
3884                 if (indices >= *indices_count) {
3885                         unique_indices[*indices_count] =
3886                                 register_list_format[ind_offset];
3887                         indices = *indices_count;
3888                         *indices_count = *indices_count + 1;
3889                         BUG_ON(*indices_count >= max_indices);
3890                 }
3891
3892                 register_list_format[ind_offset] = indices;
3893         }
3894 }
3895
3896 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3897 {
3898         int i, temp, data;
3899         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3900         int indices_count = 0;
3901         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3902         int offset_count = 0;
3903
3904         int list_size;
3905         unsigned int *register_list_format =
3906                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3907         if (register_list_format == NULL)
3908                 return -ENOMEM;
3909         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3910                         adev->gfx.rlc.reg_list_format_size_bytes);
3911
3912         gfx_v8_0_parse_ind_reg_list(register_list_format,
3913                                 RLC_FormatDirectRegListLength,
3914                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3915                                 unique_indices,
3916                                 &indices_count,
3917                                 sizeof(unique_indices) / sizeof(int),
3918                                 indirect_start_offsets,
3919                                 &offset_count,
3920                                 sizeof(indirect_start_offsets)/sizeof(int));
3921
3922         /* save and restore list */
3923         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3924
3925         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3926         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3927                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3928
3929         /* indirect list */
3930         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3931         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3932                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3933
3934         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3935         list_size = list_size >> 1;
3936         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3937         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3938
3939         /* starting offsets starts */
3940         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3941                 adev->gfx.rlc.starting_offsets_start);
3942         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3943                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3944                                 indirect_start_offsets[i]);
3945
3946         /* unique indices */
3947         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3948         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3949         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3950                 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3951                 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3952         }
3953         kfree(register_list_format);
3954
3955         return 0;
3956 }
3957
3958 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3959 {
3960         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
3961 }
3962
3963 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3964 {
3965         uint32_t data;
3966
3967         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3968                               AMD_PG_SUPPORT_GFX_SMG |
3969                               AMD_PG_SUPPORT_GFX_DMG)) {
3970                 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
3971
3972                 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
3973                 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
3974                 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
3975                 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
3976                 WREG32(mmRLC_PG_DELAY, data);
3977
3978                 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
3979                 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
3980         }
3981 }
3982
3983 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3984                                                 bool enable)
3985 {
3986         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
3987 }
3988
3989 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3990                                                   bool enable)
3991 {
3992         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
3993 }
3994
3995 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3996 {
3997         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0);
3998 }
3999
4000 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4001 {
4002         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
4003                               AMD_PG_SUPPORT_GFX_SMG |
4004                               AMD_PG_SUPPORT_GFX_DMG |
4005                               AMD_PG_SUPPORT_CP |
4006                               AMD_PG_SUPPORT_GDS |
4007                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
4008                 gfx_v8_0_init_csb(adev);
4009                 gfx_v8_0_init_save_restore_list(adev);
4010                 gfx_v8_0_enable_save_restore_machine(adev);
4011
4012                 if ((adev->asic_type == CHIP_CARRIZO) ||
4013                     (adev->asic_type == CHIP_STONEY)) {
4014                         WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4015                         gfx_v8_0_init_power_gating(adev);
4016                         WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4017                         if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4018                                 cz_enable_sck_slow_down_on_power_up(adev, true);
4019                                 cz_enable_sck_slow_down_on_power_down(adev, true);
4020                         } else {
4021                                 cz_enable_sck_slow_down_on_power_up(adev, false);
4022                                 cz_enable_sck_slow_down_on_power_down(adev, false);
4023                         }
4024                         if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4025                                 cz_enable_cp_power_gating(adev, true);
4026                         else
4027                                 cz_enable_cp_power_gating(adev, false);
4028                 } else if (adev->asic_type == CHIP_POLARIS11) {
4029                         gfx_v8_0_init_power_gating(adev);
4030                 }
4031         }
4032 }
4033
4034 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4035 {
4036         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4037
4038         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4039         gfx_v8_0_wait_for_rlc_serdes(adev);
4040 }
4041
4042 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4043 {
4044         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4045         udelay(50);
4046
4047         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4048         udelay(50);
4049 }
4050
4051 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4052 {
4053         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4054
4055         /* carrizo do enable cp interrupt after cp inited */
4056         if (!(adev->flags & AMD_IS_APU))
4057                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4058
4059         udelay(50);
4060 }
4061
4062 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4063 {
4064         const struct rlc_firmware_header_v2_0 *hdr;
4065         const __le32 *fw_data;
4066         unsigned i, fw_size;
4067
4068         if (!adev->gfx.rlc_fw)
4069                 return -EINVAL;
4070
4071         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4072         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4073
4074         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4075                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4076         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4077
4078         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4079         for (i = 0; i < fw_size; i++)
4080                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4081         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4082
4083         return 0;
4084 }
4085
4086 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4087 {
4088         int r;
4089         u32 tmp;
4090
4091         gfx_v8_0_rlc_stop(adev);
4092
4093         /* disable CG */
4094         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4095         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4096                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4097         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4098         if (adev->asic_type == CHIP_POLARIS11 ||
4099             adev->asic_type == CHIP_POLARIS10) {
4100                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4101                 tmp &= ~0x3;
4102                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4103         }
4104
4105         /* disable PG */
4106         WREG32(mmRLC_PG_CNTL, 0);
4107
4108         gfx_v8_0_rlc_reset(adev);
4109         gfx_v8_0_init_pg(adev);
4110
4111         if (!adev->pp_enabled) {
4112                 if (!adev->firmware.smu_load) {
4113                         /* legacy rlc firmware loading */
4114                         r = gfx_v8_0_rlc_load_microcode(adev);
4115                         if (r)
4116                                 return r;
4117                 } else {
4118                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4119                                                         AMDGPU_UCODE_ID_RLC_G);
4120                         if (r)
4121                                 return -EINVAL;
4122                 }
4123         }
4124
4125         gfx_v8_0_rlc_start(adev);
4126
4127         return 0;
4128 }
4129
4130 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4131 {
4132         int i;
4133         u32 tmp = RREG32(mmCP_ME_CNTL);
4134
4135         if (enable) {
4136                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4137                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4138                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4139         } else {
4140                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4141                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4142                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4143                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4144                         adev->gfx.gfx_ring[i].ready = false;
4145         }
4146         WREG32(mmCP_ME_CNTL, tmp);
4147         udelay(50);
4148 }
4149
4150 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4151 {
4152         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4153         const struct gfx_firmware_header_v1_0 *ce_hdr;
4154         const struct gfx_firmware_header_v1_0 *me_hdr;
4155         const __le32 *fw_data;
4156         unsigned i, fw_size;
4157
4158         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4159                 return -EINVAL;
4160
4161         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4162                 adev->gfx.pfp_fw->data;
4163         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4164                 adev->gfx.ce_fw->data;
4165         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4166                 adev->gfx.me_fw->data;
4167
4168         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4169         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4170         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4171
4172         gfx_v8_0_cp_gfx_enable(adev, false);
4173
4174         /* PFP */
4175         fw_data = (const __le32 *)
4176                 (adev->gfx.pfp_fw->data +
4177                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4178         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4179         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4180         for (i = 0; i < fw_size; i++)
4181                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4182         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4183
4184         /* CE */
4185         fw_data = (const __le32 *)
4186                 (adev->gfx.ce_fw->data +
4187                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4188         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4189         WREG32(mmCP_CE_UCODE_ADDR, 0);
4190         for (i = 0; i < fw_size; i++)
4191                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4192         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4193
4194         /* ME */
4195         fw_data = (const __le32 *)
4196                 (adev->gfx.me_fw->data +
4197                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4198         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4199         WREG32(mmCP_ME_RAM_WADDR, 0);
4200         for (i = 0; i < fw_size; i++)
4201                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4202         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4203
4204         return 0;
4205 }
4206
4207 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4208 {
4209         u32 count = 0;
4210         const struct cs_section_def *sect = NULL;
4211         const struct cs_extent_def *ext = NULL;
4212
4213         /* begin clear state */
4214         count += 2;
4215         /* context control state */
4216         count += 3;
4217
4218         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4219                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4220                         if (sect->id == SECT_CONTEXT)
4221                                 count += 2 + ext->reg_count;
4222                         else
4223                                 return 0;
4224                 }
4225         }
4226         /* pa_sc_raster_config/pa_sc_raster_config1 */
4227         count += 4;
4228         /* end clear state */
4229         count += 2;
4230         /* clear state */
4231         count += 2;
4232
4233         return count;
4234 }
4235
4236 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4237 {
4238         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4239         const struct cs_section_def *sect = NULL;
4240         const struct cs_extent_def *ext = NULL;
4241         int r, i;
4242
4243         /* init the CP */
4244         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4245         WREG32(mmCP_ENDIAN_SWAP, 0);
4246         WREG32(mmCP_DEVICE_ID, 1);
4247
4248         gfx_v8_0_cp_gfx_enable(adev, true);
4249
4250         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4251         if (r) {
4252                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4253                 return r;
4254         }
4255
4256         /* clear state buffer */
4257         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4258         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4259
4260         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4261         amdgpu_ring_write(ring, 0x80000000);
4262         amdgpu_ring_write(ring, 0x80000000);
4263
4264         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4265                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4266                         if (sect->id == SECT_CONTEXT) {
4267                                 amdgpu_ring_write(ring,
4268                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4269                                                ext->reg_count));
4270                                 amdgpu_ring_write(ring,
4271                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4272                                 for (i = 0; i < ext->reg_count; i++)
4273                                         amdgpu_ring_write(ring, ext->extent[i]);
4274                         }
4275                 }
4276         }
4277
4278         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4279         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4280         switch (adev->asic_type) {
4281         case CHIP_TONGA:
4282         case CHIP_POLARIS10:
4283                 amdgpu_ring_write(ring, 0x16000012);
4284                 amdgpu_ring_write(ring, 0x0000002A);
4285                 break;
4286         case CHIP_POLARIS11:
4287                 amdgpu_ring_write(ring, 0x16000012);
4288                 amdgpu_ring_write(ring, 0x00000000);
4289                 break;
4290         case CHIP_FIJI:
4291                 amdgpu_ring_write(ring, 0x3a00161a);
4292                 amdgpu_ring_write(ring, 0x0000002e);
4293                 break;
4294         case CHIP_CARRIZO:
4295                 amdgpu_ring_write(ring, 0x00000002);
4296                 amdgpu_ring_write(ring, 0x00000000);
4297                 break;
4298         case CHIP_TOPAZ:
4299                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4300                                 0x00000000 : 0x00000002);
4301                 amdgpu_ring_write(ring, 0x00000000);
4302                 break;
4303         case CHIP_STONEY:
4304                 amdgpu_ring_write(ring, 0x00000000);
4305                 amdgpu_ring_write(ring, 0x00000000);
4306                 break;
4307         default:
4308                 BUG();
4309         }
4310
4311         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4312         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4313
4314         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4315         amdgpu_ring_write(ring, 0);
4316
4317         /* init the CE partitions */
4318         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4319         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4320         amdgpu_ring_write(ring, 0x8000);
4321         amdgpu_ring_write(ring, 0x8000);
4322
4323         amdgpu_ring_commit(ring);
4324
4325         return 0;
4326 }
4327
4328 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4329 {
4330         struct amdgpu_ring *ring;
4331         u32 tmp;
4332         u32 rb_bufsz;
4333         u64 rb_addr, rptr_addr;
4334         int r;
4335
4336         /* Set the write pointer delay */
4337         WREG32(mmCP_RB_WPTR_DELAY, 0);
4338
4339         /* set the RB to use vmid 0 */
4340         WREG32(mmCP_RB_VMID, 0);
4341
4342         /* Set ring buffer size */
4343         ring = &adev->gfx.gfx_ring[0];
4344         rb_bufsz = order_base_2(ring->ring_size / 8);
4345         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4346         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4347         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4348         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4349 #ifdef __BIG_ENDIAN
4350         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4351 #endif
4352         WREG32(mmCP_RB0_CNTL, tmp);
4353
4354         /* Initialize the ring buffer's read and write pointers */
4355         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4356         ring->wptr = 0;
4357         WREG32(mmCP_RB0_WPTR, ring->wptr);
4358
4359         /* set the wb address wether it's enabled or not */
4360         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4361         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4362         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4363
4364         mdelay(1);
4365         WREG32(mmCP_RB0_CNTL, tmp);
4366
4367         rb_addr = ring->gpu_addr >> 8;
4368         WREG32(mmCP_RB0_BASE, rb_addr);
4369         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4370
4371         /* no gfx doorbells on iceland */
4372         if (adev->asic_type != CHIP_TOPAZ) {
4373                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4374                 if (ring->use_doorbell) {
4375                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4376                                             DOORBELL_OFFSET, ring->doorbell_index);
4377                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4378                                             DOORBELL_HIT, 0);
4379                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4380                                             DOORBELL_EN, 1);
4381                 } else {
4382                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4383                                             DOORBELL_EN, 0);
4384                 }
4385                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4386
4387                 if (adev->asic_type == CHIP_TONGA) {
4388                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4389                                             DOORBELL_RANGE_LOWER,
4390                                             AMDGPU_DOORBELL_GFX_RING0);
4391                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4392
4393                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4394                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4395                 }
4396
4397         }
4398
4399         /* start the ring */
4400         gfx_v8_0_cp_gfx_start(adev);
4401         ring->ready = true;
4402         r = amdgpu_ring_test_ring(ring);
4403         if (r)
4404                 ring->ready = false;
4405
4406         return r;
4407 }
4408
4409 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4410 {
4411         int i;
4412
4413         if (enable) {
4414                 WREG32(mmCP_MEC_CNTL, 0);
4415         } else {
4416                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4417                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4418                         adev->gfx.compute_ring[i].ready = false;
4419         }
4420         udelay(50);
4421 }
4422
4423 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4424 {
4425         const struct gfx_firmware_header_v1_0 *mec_hdr;
4426         const __le32 *fw_data;
4427         unsigned i, fw_size;
4428
4429         if (!adev->gfx.mec_fw)
4430                 return -EINVAL;
4431
4432         gfx_v8_0_cp_compute_enable(adev, false);
4433
4434         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4435         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4436
4437         fw_data = (const __le32 *)
4438                 (adev->gfx.mec_fw->data +
4439                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4440         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4441
4442         /* MEC1 */
4443         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4444         for (i = 0; i < fw_size; i++)
4445                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4446         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4447
4448         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4449         if (adev->gfx.mec2_fw) {
4450                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4451
4452                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4453                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4454
4455                 fw_data = (const __le32 *)
4456                         (adev->gfx.mec2_fw->data +
4457                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4458                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4459
4460                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4461                 for (i = 0; i < fw_size; i++)
4462                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4463                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4464         }
4465
4466         return 0;
4467 }
4468
4469 struct vi_mqd {
4470         uint32_t header;  /* ordinal0 */
4471         uint32_t compute_dispatch_initiator;  /* ordinal1 */
4472         uint32_t compute_dim_x;  /* ordinal2 */
4473         uint32_t compute_dim_y;  /* ordinal3 */
4474         uint32_t compute_dim_z;  /* ordinal4 */
4475         uint32_t compute_start_x;  /* ordinal5 */
4476         uint32_t compute_start_y;  /* ordinal6 */
4477         uint32_t compute_start_z;  /* ordinal7 */
4478         uint32_t compute_num_thread_x;  /* ordinal8 */
4479         uint32_t compute_num_thread_y;  /* ordinal9 */
4480         uint32_t compute_num_thread_z;  /* ordinal10 */
4481         uint32_t compute_pipelinestat_enable;  /* ordinal11 */
4482         uint32_t compute_perfcount_enable;  /* ordinal12 */
4483         uint32_t compute_pgm_lo;  /* ordinal13 */
4484         uint32_t compute_pgm_hi;  /* ordinal14 */
4485         uint32_t compute_tba_lo;  /* ordinal15 */
4486         uint32_t compute_tba_hi;  /* ordinal16 */
4487         uint32_t compute_tma_lo;  /* ordinal17 */
4488         uint32_t compute_tma_hi;  /* ordinal18 */
4489         uint32_t compute_pgm_rsrc1;  /* ordinal19 */
4490         uint32_t compute_pgm_rsrc2;  /* ordinal20 */
4491         uint32_t compute_vmid;  /* ordinal21 */
4492         uint32_t compute_resource_limits;  /* ordinal22 */
4493         uint32_t compute_static_thread_mgmt_se0;  /* ordinal23 */
4494         uint32_t compute_static_thread_mgmt_se1;  /* ordinal24 */
4495         uint32_t compute_tmpring_size;  /* ordinal25 */
4496         uint32_t compute_static_thread_mgmt_se2;  /* ordinal26 */
4497         uint32_t compute_static_thread_mgmt_se3;  /* ordinal27 */
4498         uint32_t compute_restart_x;  /* ordinal28 */
4499         uint32_t compute_restart_y;  /* ordinal29 */
4500         uint32_t compute_restart_z;  /* ordinal30 */
4501         uint32_t compute_thread_trace_enable;  /* ordinal31 */
4502         uint32_t compute_misc_reserved;  /* ordinal32 */
4503         uint32_t compute_dispatch_id;  /* ordinal33 */
4504         uint32_t compute_threadgroup_id;  /* ordinal34 */
4505         uint32_t compute_relaunch;  /* ordinal35 */
4506         uint32_t compute_wave_restore_addr_lo;  /* ordinal36 */
4507         uint32_t compute_wave_restore_addr_hi;  /* ordinal37 */
4508         uint32_t compute_wave_restore_control;  /* ordinal38 */
4509         uint32_t reserved9;  /* ordinal39 */
4510         uint32_t reserved10;  /* ordinal40 */
4511         uint32_t reserved11;  /* ordinal41 */
4512         uint32_t reserved12;  /* ordinal42 */
4513         uint32_t reserved13;  /* ordinal43 */
4514         uint32_t reserved14;  /* ordinal44 */
4515         uint32_t reserved15;  /* ordinal45 */
4516         uint32_t reserved16;  /* ordinal46 */
4517         uint32_t reserved17;  /* ordinal47 */
4518         uint32_t reserved18;  /* ordinal48 */
4519         uint32_t reserved19;  /* ordinal49 */
4520         uint32_t reserved20;  /* ordinal50 */
4521         uint32_t reserved21;  /* ordinal51 */
4522         uint32_t reserved22;  /* ordinal52 */
4523         uint32_t reserved23;  /* ordinal53 */
4524         uint32_t reserved24;  /* ordinal54 */
4525         uint32_t reserved25;  /* ordinal55 */
4526         uint32_t reserved26;  /* ordinal56 */
4527         uint32_t reserved27;  /* ordinal57 */
4528         uint32_t reserved28;  /* ordinal58 */
4529         uint32_t reserved29;  /* ordinal59 */
4530         uint32_t reserved30;  /* ordinal60 */
4531         uint32_t reserved31;  /* ordinal61 */
4532         uint32_t reserved32;  /* ordinal62 */
4533         uint32_t reserved33;  /* ordinal63 */
4534         uint32_t reserved34;  /* ordinal64 */
4535         uint32_t compute_user_data_0;  /* ordinal65 */
4536         uint32_t compute_user_data_1;  /* ordinal66 */
4537         uint32_t compute_user_data_2;  /* ordinal67 */
4538         uint32_t compute_user_data_3;  /* ordinal68 */
4539         uint32_t compute_user_data_4;  /* ordinal69 */
4540         uint32_t compute_user_data_5;  /* ordinal70 */
4541         uint32_t compute_user_data_6;  /* ordinal71 */
4542         uint32_t compute_user_data_7;  /* ordinal72 */
4543         uint32_t compute_user_data_8;  /* ordinal73 */
4544         uint32_t compute_user_data_9;  /* ordinal74 */
4545         uint32_t compute_user_data_10;  /* ordinal75 */
4546         uint32_t compute_user_data_11;  /* ordinal76 */
4547         uint32_t compute_user_data_12;  /* ordinal77 */
4548         uint32_t compute_user_data_13;  /* ordinal78 */
4549         uint32_t compute_user_data_14;  /* ordinal79 */
4550         uint32_t compute_user_data_15;  /* ordinal80 */
4551         uint32_t cp_compute_csinvoc_count_lo;  /* ordinal81 */
4552         uint32_t cp_compute_csinvoc_count_hi;  /* ordinal82 */
4553         uint32_t reserved35;  /* ordinal83 */
4554         uint32_t reserved36;  /* ordinal84 */
4555         uint32_t reserved37;  /* ordinal85 */
4556         uint32_t cp_mqd_query_time_lo;  /* ordinal86 */
4557         uint32_t cp_mqd_query_time_hi;  /* ordinal87 */
4558         uint32_t cp_mqd_connect_start_time_lo;  /* ordinal88 */
4559         uint32_t cp_mqd_connect_start_time_hi;  /* ordinal89 */
4560         uint32_t cp_mqd_connect_end_time_lo;  /* ordinal90 */
4561         uint32_t cp_mqd_connect_end_time_hi;  /* ordinal91 */
4562         uint32_t cp_mqd_connect_end_wf_count;  /* ordinal92 */
4563         uint32_t cp_mqd_connect_end_pq_rptr;  /* ordinal93 */
4564         uint32_t cp_mqd_connect_end_pq_wptr;  /* ordinal94 */
4565         uint32_t cp_mqd_connect_end_ib_rptr;  /* ordinal95 */
4566         uint32_t reserved38;  /* ordinal96 */
4567         uint32_t reserved39;  /* ordinal97 */
4568         uint32_t cp_mqd_save_start_time_lo;  /* ordinal98 */
4569         uint32_t cp_mqd_save_start_time_hi;  /* ordinal99 */
4570         uint32_t cp_mqd_save_end_time_lo;  /* ordinal100 */
4571         uint32_t cp_mqd_save_end_time_hi;  /* ordinal101 */
4572         uint32_t cp_mqd_restore_start_time_lo;  /* ordinal102 */
4573         uint32_t cp_mqd_restore_start_time_hi;  /* ordinal103 */
4574         uint32_t cp_mqd_restore_end_time_lo;  /* ordinal104 */
4575         uint32_t cp_mqd_restore_end_time_hi;  /* ordinal105 */
4576         uint32_t reserved40;  /* ordinal106 */
4577         uint32_t reserved41;  /* ordinal107 */
4578         uint32_t gds_cs_ctxsw_cnt0;  /* ordinal108 */
4579         uint32_t gds_cs_ctxsw_cnt1;  /* ordinal109 */
4580         uint32_t gds_cs_ctxsw_cnt2;  /* ordinal110 */
4581         uint32_t gds_cs_ctxsw_cnt3;  /* ordinal111 */
4582         uint32_t reserved42;  /* ordinal112 */
4583         uint32_t reserved43;  /* ordinal113 */
4584         uint32_t cp_pq_exe_status_lo;  /* ordinal114 */
4585         uint32_t cp_pq_exe_status_hi;  /* ordinal115 */
4586         uint32_t cp_packet_id_lo;  /* ordinal116 */
4587         uint32_t cp_packet_id_hi;  /* ordinal117 */
4588         uint32_t cp_packet_exe_status_lo;  /* ordinal118 */
4589         uint32_t cp_packet_exe_status_hi;  /* ordinal119 */
4590         uint32_t gds_save_base_addr_lo;  /* ordinal120 */
4591         uint32_t gds_save_base_addr_hi;  /* ordinal121 */
4592         uint32_t gds_save_mask_lo;  /* ordinal122 */
4593         uint32_t gds_save_mask_hi;  /* ordinal123 */
4594         uint32_t ctx_save_base_addr_lo;  /* ordinal124 */
4595         uint32_t ctx_save_base_addr_hi;  /* ordinal125 */
4596         uint32_t reserved44;  /* ordinal126 */
4597         uint32_t reserved45;  /* ordinal127 */
4598         uint32_t cp_mqd_base_addr_lo;  /* ordinal128 */
4599         uint32_t cp_mqd_base_addr_hi;  /* ordinal129 */
4600         uint32_t cp_hqd_active;  /* ordinal130 */
4601         uint32_t cp_hqd_vmid;  /* ordinal131 */
4602         uint32_t cp_hqd_persistent_state;  /* ordinal132 */
4603         uint32_t cp_hqd_pipe_priority;  /* ordinal133 */
4604         uint32_t cp_hqd_queue_priority;  /* ordinal134 */
4605         uint32_t cp_hqd_quantum;  /* ordinal135 */
4606         uint32_t cp_hqd_pq_base_lo;  /* ordinal136 */
4607         uint32_t cp_hqd_pq_base_hi;  /* ordinal137 */
4608         uint32_t cp_hqd_pq_rptr;  /* ordinal138 */
4609         uint32_t cp_hqd_pq_rptr_report_addr_lo;  /* ordinal139 */
4610         uint32_t cp_hqd_pq_rptr_report_addr_hi;  /* ordinal140 */
4611         uint32_t cp_hqd_pq_wptr_poll_addr;  /* ordinal141 */
4612         uint32_t cp_hqd_pq_wptr_poll_addr_hi;  /* ordinal142 */
4613         uint32_t cp_hqd_pq_doorbell_control;  /* ordinal143 */
4614         uint32_t cp_hqd_pq_wptr;  /* ordinal144 */
4615         uint32_t cp_hqd_pq_control;  /* ordinal145 */
4616         uint32_t cp_hqd_ib_base_addr_lo;  /* ordinal146 */
4617         uint32_t cp_hqd_ib_base_addr_hi;  /* ordinal147 */
4618         uint32_t cp_hqd_ib_rptr;  /* ordinal148 */
4619         uint32_t cp_hqd_ib_control;  /* ordinal149 */
4620         uint32_t cp_hqd_iq_timer;  /* ordinal150 */
4621         uint32_t cp_hqd_iq_rptr;  /* ordinal151 */
4622         uint32_t cp_hqd_dequeue_request;  /* ordinal152 */
4623         uint32_t cp_hqd_dma_offload;  /* ordinal153 */
4624         uint32_t cp_hqd_sema_cmd;  /* ordinal154 */
4625         uint32_t cp_hqd_msg_type;  /* ordinal155 */
4626         uint32_t cp_hqd_atomic0_preop_lo;  /* ordinal156 */
4627         uint32_t cp_hqd_atomic0_preop_hi;  /* ordinal157 */
4628         uint32_t cp_hqd_atomic1_preop_lo;  /* ordinal158 */
4629         uint32_t cp_hqd_atomic1_preop_hi;  /* ordinal159 */
4630         uint32_t cp_hqd_hq_status0;  /* ordinal160 */
4631         uint32_t cp_hqd_hq_control0;  /* ordinal161 */
4632         uint32_t cp_mqd_control;  /* ordinal162 */
4633         uint32_t cp_hqd_hq_status1;  /* ordinal163 */
4634         uint32_t cp_hqd_hq_control1;  /* ordinal164 */
4635         uint32_t cp_hqd_eop_base_addr_lo;  /* ordinal165 */
4636         uint32_t cp_hqd_eop_base_addr_hi;  /* ordinal166 */
4637         uint32_t cp_hqd_eop_control;  /* ordinal167 */
4638         uint32_t cp_hqd_eop_rptr;  /* ordinal168 */
4639         uint32_t cp_hqd_eop_wptr;  /* ordinal169 */
4640         uint32_t cp_hqd_eop_done_events;  /* ordinal170 */
4641         uint32_t cp_hqd_ctx_save_base_addr_lo;  /* ordinal171 */
4642         uint32_t cp_hqd_ctx_save_base_addr_hi;  /* ordinal172 */
4643         uint32_t cp_hqd_ctx_save_control;  /* ordinal173 */
4644         uint32_t cp_hqd_cntl_stack_offset;  /* ordinal174 */
4645         uint32_t cp_hqd_cntl_stack_size;  /* ordinal175 */
4646         uint32_t cp_hqd_wg_state_offset;  /* ordinal176 */
4647         uint32_t cp_hqd_ctx_save_size;  /* ordinal177 */
4648         uint32_t cp_hqd_gds_resource_state;  /* ordinal178 */
4649         uint32_t cp_hqd_error;  /* ordinal179 */
4650         uint32_t cp_hqd_eop_wptr_mem;  /* ordinal180 */
4651         uint32_t cp_hqd_eop_dones;  /* ordinal181 */
4652         uint32_t reserved46;  /* ordinal182 */
4653         uint32_t reserved47;  /* ordinal183 */
4654         uint32_t reserved48;  /* ordinal184 */
4655         uint32_t reserved49;  /* ordinal185 */
4656         uint32_t reserved50;  /* ordinal186 */
4657         uint32_t reserved51;  /* ordinal187 */
4658         uint32_t reserved52;  /* ordinal188 */
4659         uint32_t reserved53;  /* ordinal189 */
4660         uint32_t reserved54;  /* ordinal190 */
4661         uint32_t reserved55;  /* ordinal191 */
4662         uint32_t iqtimer_pkt_header;  /* ordinal192 */
4663         uint32_t iqtimer_pkt_dw0;  /* ordinal193 */
4664         uint32_t iqtimer_pkt_dw1;  /* ordinal194 */
4665         uint32_t iqtimer_pkt_dw2;  /* ordinal195 */
4666         uint32_t iqtimer_pkt_dw3;  /* ordinal196 */
4667         uint32_t iqtimer_pkt_dw4;  /* ordinal197 */
4668         uint32_t iqtimer_pkt_dw5;  /* ordinal198 */
4669         uint32_t iqtimer_pkt_dw6;  /* ordinal199 */
4670         uint32_t iqtimer_pkt_dw7;  /* ordinal200 */
4671         uint32_t iqtimer_pkt_dw8;  /* ordinal201 */
4672         uint32_t iqtimer_pkt_dw9;  /* ordinal202 */
4673         uint32_t iqtimer_pkt_dw10;  /* ordinal203 */
4674         uint32_t iqtimer_pkt_dw11;  /* ordinal204 */
4675         uint32_t iqtimer_pkt_dw12;  /* ordinal205 */
4676         uint32_t iqtimer_pkt_dw13;  /* ordinal206 */
4677         uint32_t iqtimer_pkt_dw14;  /* ordinal207 */
4678         uint32_t iqtimer_pkt_dw15;  /* ordinal208 */
4679         uint32_t iqtimer_pkt_dw16;  /* ordinal209 */
4680         uint32_t iqtimer_pkt_dw17;  /* ordinal210 */
4681         uint32_t iqtimer_pkt_dw18;  /* ordinal211 */
4682         uint32_t iqtimer_pkt_dw19;  /* ordinal212 */
4683         uint32_t iqtimer_pkt_dw20;  /* ordinal213 */
4684         uint32_t iqtimer_pkt_dw21;  /* ordinal214 */
4685         uint32_t iqtimer_pkt_dw22;  /* ordinal215 */
4686         uint32_t iqtimer_pkt_dw23;  /* ordinal216 */
4687         uint32_t iqtimer_pkt_dw24;  /* ordinal217 */
4688         uint32_t iqtimer_pkt_dw25;  /* ordinal218 */
4689         uint32_t iqtimer_pkt_dw26;  /* ordinal219 */
4690         uint32_t iqtimer_pkt_dw27;  /* ordinal220 */
4691         uint32_t iqtimer_pkt_dw28;  /* ordinal221 */
4692         uint32_t iqtimer_pkt_dw29;  /* ordinal222 */
4693         uint32_t iqtimer_pkt_dw30;  /* ordinal223 */
4694         uint32_t iqtimer_pkt_dw31;  /* ordinal224 */
4695         uint32_t reserved56;  /* ordinal225 */
4696         uint32_t reserved57;  /* ordinal226 */
4697         uint32_t reserved58;  /* ordinal227 */
4698         uint32_t set_resources_header;  /* ordinal228 */
4699         uint32_t set_resources_dw1;  /* ordinal229 */
4700         uint32_t set_resources_dw2;  /* ordinal230 */
4701         uint32_t set_resources_dw3;  /* ordinal231 */
4702         uint32_t set_resources_dw4;  /* ordinal232 */
4703         uint32_t set_resources_dw5;  /* ordinal233 */
4704         uint32_t set_resources_dw6;  /* ordinal234 */
4705         uint32_t set_resources_dw7;  /* ordinal235 */
4706         uint32_t reserved59;  /* ordinal236 */
4707         uint32_t reserved60;  /* ordinal237 */
4708         uint32_t reserved61;  /* ordinal238 */
4709         uint32_t reserved62;  /* ordinal239 */
4710         uint32_t reserved63;  /* ordinal240 */
4711         uint32_t reserved64;  /* ordinal241 */
4712         uint32_t reserved65;  /* ordinal242 */
4713         uint32_t reserved66;  /* ordinal243 */
4714         uint32_t reserved67;  /* ordinal244 */
4715         uint32_t reserved68;  /* ordinal245 */
4716         uint32_t reserved69;  /* ordinal246 */
4717         uint32_t reserved70;  /* ordinal247 */
4718         uint32_t reserved71;  /* ordinal248 */
4719         uint32_t reserved72;  /* ordinal249 */
4720         uint32_t reserved73;  /* ordinal250 */
4721         uint32_t reserved74;  /* ordinal251 */
4722         uint32_t reserved75;  /* ordinal252 */
4723         uint32_t reserved76;  /* ordinal253 */
4724         uint32_t reserved77;  /* ordinal254 */
4725         uint32_t reserved78;  /* ordinal255 */
4726
4727         uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4728 };
4729
4730 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4731 {
4732         int i, r;
4733
4734         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4735                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4736
4737                 if (ring->mqd_obj) {
4738                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4739                         if (unlikely(r != 0))
4740                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4741
4742                         amdgpu_bo_unpin(ring->mqd_obj);
4743                         amdgpu_bo_unreserve(ring->mqd_obj);
4744
4745                         amdgpu_bo_unref(&ring->mqd_obj);
4746                         ring->mqd_obj = NULL;
4747                 }
4748         }
4749 }
4750
4751 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4752 {
4753         int r, i, j;
4754         u32 tmp;
4755         bool use_doorbell = true;
4756         u64 hqd_gpu_addr;
4757         u64 mqd_gpu_addr;
4758         u64 eop_gpu_addr;
4759         u64 wb_gpu_addr;
4760         u32 *buf;
4761         struct vi_mqd *mqd;
4762
4763         /* init the pipes */
4764         mutex_lock(&adev->srbm_mutex);
4765         for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4766                 int me = (i < 4) ? 1 : 2;
4767                 int pipe = (i < 4) ? i : (i - 4);
4768
4769                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4770                 eop_gpu_addr >>= 8;
4771
4772                 vi_srbm_select(adev, me, pipe, 0, 0);
4773
4774                 /* write the EOP addr */
4775                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4776                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4777
4778                 /* set the VMID assigned */
4779                 WREG32(mmCP_HQD_VMID, 0);
4780
4781                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4782                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4783                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4784                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
4785                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4786         }
4787         vi_srbm_select(adev, 0, 0, 0, 0);
4788         mutex_unlock(&adev->srbm_mutex);
4789
4790         /* init the queues.  Just two for now. */
4791         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4792                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4793
4794                 if (ring->mqd_obj == NULL) {
4795                         r = amdgpu_bo_create(adev,
4796                                              sizeof(struct vi_mqd),
4797                                              PAGE_SIZE, true,
4798                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4799                                              NULL, &ring->mqd_obj);
4800                         if (r) {
4801                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4802                                 return r;
4803                         }
4804                 }
4805
4806                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4807                 if (unlikely(r != 0)) {
4808                         gfx_v8_0_cp_compute_fini(adev);
4809                         return r;
4810                 }
4811                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4812                                   &mqd_gpu_addr);
4813                 if (r) {
4814                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4815                         gfx_v8_0_cp_compute_fini(adev);
4816                         return r;
4817                 }
4818                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4819                 if (r) {
4820                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4821                         gfx_v8_0_cp_compute_fini(adev);
4822                         return r;
4823                 }
4824
4825                 /* init the mqd struct */
4826                 memset(buf, 0, sizeof(struct vi_mqd));
4827
4828                 mqd = (struct vi_mqd *)buf;
4829                 mqd->header = 0xC0310800;
4830                 mqd->compute_pipelinestat_enable = 0x00000001;
4831                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4832                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4833                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4834                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4835                 mqd->compute_misc_reserved = 0x00000003;
4836
4837                 mutex_lock(&adev->srbm_mutex);
4838                 vi_srbm_select(adev, ring->me,
4839                                ring->pipe,
4840                                ring->queue, 0);
4841
4842                 /* disable wptr polling */
4843                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4844                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4845                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4846
4847                 mqd->cp_hqd_eop_base_addr_lo =
4848                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
4849                 mqd->cp_hqd_eop_base_addr_hi =
4850                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4851
4852                 /* enable doorbell? */
4853                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4854                 if (use_doorbell) {
4855                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4856                 } else {
4857                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4858                 }
4859                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4860                 mqd->cp_hqd_pq_doorbell_control = tmp;
4861
4862                 /* disable the queue if it's active */
4863                 mqd->cp_hqd_dequeue_request = 0;
4864                 mqd->cp_hqd_pq_rptr = 0;
4865                 mqd->cp_hqd_pq_wptr= 0;
4866                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4867                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4868                         for (j = 0; j < adev->usec_timeout; j++) {
4869                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4870                                         break;
4871                                 udelay(1);
4872                         }
4873                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4874                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4875                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4876                 }
4877
4878                 /* set the pointer to the MQD */
4879                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4880                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4881                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4882                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4883
4884                 /* set MQD vmid to 0 */
4885                 tmp = RREG32(mmCP_MQD_CONTROL);
4886                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4887                 WREG32(mmCP_MQD_CONTROL, tmp);
4888                 mqd->cp_mqd_control = tmp;
4889
4890                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4891                 hqd_gpu_addr = ring->gpu_addr >> 8;
4892                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4893                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4894                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4895                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4896
4897                 /* set up the HQD, this is similar to CP_RB0_CNTL */
4898                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4899                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4900                                     (order_base_2(ring->ring_size / 4) - 1));
4901                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4902                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4903 #ifdef __BIG_ENDIAN
4904                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4905 #endif
4906                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4907                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4908                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4909                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4910                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4911                 mqd->cp_hqd_pq_control = tmp;
4912
4913                 /* set the wb address wether it's enabled or not */
4914                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4915                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4916                 mqd->cp_hqd_pq_rptr_report_addr_hi =
4917                         upper_32_bits(wb_gpu_addr) & 0xffff;
4918                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4919                        mqd->cp_hqd_pq_rptr_report_addr_lo);
4920                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4921                        mqd->cp_hqd_pq_rptr_report_addr_hi);
4922
4923                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4924                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4925                 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4926                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4927                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4928                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4929                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
4930
4931                 /* enable the doorbell if requested */
4932                 if (use_doorbell) {
4933                         if ((adev->asic_type == CHIP_CARRIZO) ||
4934                             (adev->asic_type == CHIP_FIJI) ||
4935                             (adev->asic_type == CHIP_STONEY) ||
4936                             (adev->asic_type == CHIP_POLARIS11) ||
4937                             (adev->asic_type == CHIP_POLARIS10)) {
4938                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4939                                        AMDGPU_DOORBELL_KIQ << 2);
4940                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4941                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
4942                         }
4943                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4944                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4945                                             DOORBELL_OFFSET, ring->doorbell_index);
4946                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4947                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4948                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4949                         mqd->cp_hqd_pq_doorbell_control = tmp;
4950
4951                 } else {
4952                         mqd->cp_hqd_pq_doorbell_control = 0;
4953                 }
4954                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4955                        mqd->cp_hqd_pq_doorbell_control);
4956
4957                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4958                 ring->wptr = 0;
4959                 mqd->cp_hqd_pq_wptr = ring->wptr;
4960                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4961                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4962
4963                 /* set the vmid for the queue */
4964                 mqd->cp_hqd_vmid = 0;
4965                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4966
4967                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4968                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4969                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4970                 mqd->cp_hqd_persistent_state = tmp;
4971                 if (adev->asic_type == CHIP_STONEY ||
4972                         adev->asic_type == CHIP_POLARIS11 ||
4973                         adev->asic_type == CHIP_POLARIS10) {
4974                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4975                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4976                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4977                 }
4978
4979                 /* activate the queue */
4980                 mqd->cp_hqd_active = 1;
4981                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4982
4983                 vi_srbm_select(adev, 0, 0, 0, 0);
4984                 mutex_unlock(&adev->srbm_mutex);
4985
4986                 amdgpu_bo_kunmap(ring->mqd_obj);
4987                 amdgpu_bo_unreserve(ring->mqd_obj);
4988         }
4989
4990         if (use_doorbell) {
4991                 tmp = RREG32(mmCP_PQ_STATUS);
4992                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4993                 WREG32(mmCP_PQ_STATUS, tmp);
4994         }
4995
4996         gfx_v8_0_cp_compute_enable(adev, true);
4997
4998         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4999                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5000
5001                 ring->ready = true;
5002                 r = amdgpu_ring_test_ring(ring);
5003                 if (r)
5004                         ring->ready = false;
5005         }
5006
5007         return 0;
5008 }
5009
5010 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5011 {
5012         int r;
5013
5014         if (!(adev->flags & AMD_IS_APU))
5015                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5016
5017         if (!adev->pp_enabled) {
5018                 if (!adev->firmware.smu_load) {
5019                         /* legacy firmware loading */
5020                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5021                         if (r)
5022                                 return r;
5023
5024                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5025                         if (r)
5026                                 return r;
5027                 } else {
5028                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5029                                                         AMDGPU_UCODE_ID_CP_CE);
5030                         if (r)
5031                                 return -EINVAL;
5032
5033                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5034                                                         AMDGPU_UCODE_ID_CP_PFP);
5035                         if (r)
5036                                 return -EINVAL;
5037
5038                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5039                                                         AMDGPU_UCODE_ID_CP_ME);
5040                         if (r)
5041                                 return -EINVAL;
5042
5043                         if (adev->asic_type == CHIP_TOPAZ) {
5044                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5045                                 if (r)
5046                                         return r;
5047                         } else {
5048                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5049                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5050                                 if (r)
5051                                         return -EINVAL;
5052                         }
5053                 }
5054         }
5055
5056         r = gfx_v8_0_cp_gfx_resume(adev);
5057         if (r)
5058                 return r;
5059
5060         r = gfx_v8_0_cp_compute_resume(adev);
5061         if (r)
5062                 return r;
5063
5064         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5065
5066         return 0;
5067 }
5068
5069 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5070 {
5071         gfx_v8_0_cp_gfx_enable(adev, enable);
5072         gfx_v8_0_cp_compute_enable(adev, enable);
5073 }
5074
5075 static int gfx_v8_0_hw_init(void *handle)
5076 {
5077         int r;
5078         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5079
5080         gfx_v8_0_init_golden_registers(adev);
5081         gfx_v8_0_gpu_init(adev);
5082
5083         r = gfx_v8_0_rlc_resume(adev);
5084         if (r)
5085                 return r;
5086
5087         r = gfx_v8_0_cp_resume(adev);
5088
5089         return r;
5090 }
5091
5092 static int gfx_v8_0_hw_fini(void *handle)
5093 {
5094         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5095
5096         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5097         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5098         gfx_v8_0_cp_enable(adev, false);
5099         gfx_v8_0_rlc_stop(adev);
5100         gfx_v8_0_cp_compute_fini(adev);
5101
5102         amdgpu_set_powergating_state(adev,
5103                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5104
5105         return 0;
5106 }
5107
5108 static int gfx_v8_0_suspend(void *handle)
5109 {
5110         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5111
5112         return gfx_v8_0_hw_fini(adev);
5113 }
5114
5115 static int gfx_v8_0_resume(void *handle)
5116 {
5117         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5118
5119         return gfx_v8_0_hw_init(adev);
5120 }
5121
5122 static bool gfx_v8_0_is_idle(void *handle)
5123 {
5124         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5125
5126         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5127                 return false;
5128         else
5129                 return true;
5130 }
5131
5132 static int gfx_v8_0_wait_for_idle(void *handle)
5133 {
5134         unsigned i;
5135         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5136
5137         for (i = 0; i < adev->usec_timeout; i++) {
5138                 if (gfx_v8_0_is_idle(handle))
5139                         return 0;
5140
5141                 udelay(1);
5142         }
5143         return -ETIMEDOUT;
5144 }
5145
5146 static bool gfx_v8_0_check_soft_reset(void *handle)
5147 {
5148         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5149         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5150         u32 tmp;
5151
5152         /* GRBM_STATUS */
5153         tmp = RREG32(mmGRBM_STATUS);
5154         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5155                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5156                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5157                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5158                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5159                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5160                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5161                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5162                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5163                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5164                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5165                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5166                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5167         }
5168
5169         /* GRBM_STATUS2 */
5170         tmp = RREG32(mmGRBM_STATUS2);
5171         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5172                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5173                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5174
5175         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5176             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5177             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5178                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5179                                                 SOFT_RESET_CPF, 1);
5180                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5181                                                 SOFT_RESET_CPC, 1);
5182                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5183                                                 SOFT_RESET_CPG, 1);
5184                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5185                                                 SOFT_RESET_GRBM, 1);
5186         }
5187
5188         /* SRBM_STATUS */
5189         tmp = RREG32(mmSRBM_STATUS);
5190         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5191                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5192                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5193         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5194                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5195                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5196
5197         if (grbm_soft_reset || srbm_soft_reset) {
5198                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5199                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5200                 return true;
5201         } else {
5202                 adev->gfx.grbm_soft_reset = 0;
5203                 adev->gfx.srbm_soft_reset = 0;
5204                 return false;
5205         }
5206 }
5207
5208 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5209                                   struct amdgpu_ring *ring)
5210 {
5211         int i;
5212
5213         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5214         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5215                 u32 tmp;
5216                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5217                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5218                                     DEQUEUE_REQ, 2);
5219                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5220                 for (i = 0; i < adev->usec_timeout; i++) {
5221                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5222                                 break;
5223                         udelay(1);
5224                 }
5225         }
5226 }
5227
5228 static int gfx_v8_0_pre_soft_reset(void *handle)
5229 {
5230         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5231         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5232
5233         if ((!adev->gfx.grbm_soft_reset) &&
5234             (!adev->gfx.srbm_soft_reset))
5235                 return 0;
5236
5237         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5238         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5239
5240         /* stop the rlc */
5241         gfx_v8_0_rlc_stop(adev);
5242
5243         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5244             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5245                 /* Disable GFX parsing/prefetching */
5246                 gfx_v8_0_cp_gfx_enable(adev, false);
5247
5248         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5249             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5250             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5251             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5252                 int i;
5253
5254                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5255                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5256
5257                         gfx_v8_0_inactive_hqd(adev, ring);
5258                 }
5259                 /* Disable MEC parsing/prefetching */
5260                 gfx_v8_0_cp_compute_enable(adev, false);
5261         }
5262
5263        return 0;
5264 }
5265
5266 static int gfx_v8_0_soft_reset(void *handle)
5267 {
5268         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5269         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5270         u32 tmp;
5271
5272         if ((!adev->gfx.grbm_soft_reset) &&
5273             (!adev->gfx.srbm_soft_reset))
5274                 return 0;
5275
5276         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5277         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5278
5279         if (grbm_soft_reset || srbm_soft_reset) {
5280                 tmp = RREG32(mmGMCON_DEBUG);
5281                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5282                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5283                 WREG32(mmGMCON_DEBUG, tmp);
5284                 udelay(50);
5285         }
5286
5287         if (grbm_soft_reset) {
5288                 tmp = RREG32(mmGRBM_SOFT_RESET);
5289                 tmp |= grbm_soft_reset;
5290                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5291                 WREG32(mmGRBM_SOFT_RESET, tmp);
5292                 tmp = RREG32(mmGRBM_SOFT_RESET);
5293
5294                 udelay(50);
5295
5296                 tmp &= ~grbm_soft_reset;
5297                 WREG32(mmGRBM_SOFT_RESET, tmp);
5298                 tmp = RREG32(mmGRBM_SOFT_RESET);
5299         }
5300
5301         if (srbm_soft_reset) {
5302                 tmp = RREG32(mmSRBM_SOFT_RESET);
5303                 tmp |= srbm_soft_reset;
5304                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5305                 WREG32(mmSRBM_SOFT_RESET, tmp);
5306                 tmp = RREG32(mmSRBM_SOFT_RESET);
5307
5308                 udelay(50);
5309
5310                 tmp &= ~srbm_soft_reset;
5311                 WREG32(mmSRBM_SOFT_RESET, tmp);
5312                 tmp = RREG32(mmSRBM_SOFT_RESET);
5313         }
5314
5315         if (grbm_soft_reset || srbm_soft_reset) {
5316                 tmp = RREG32(mmGMCON_DEBUG);
5317                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5318                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5319                 WREG32(mmGMCON_DEBUG, tmp);
5320         }
5321
5322         /* Wait a little for things to settle down */
5323         udelay(50);
5324
5325         return 0;
5326 }
5327
5328 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5329                               struct amdgpu_ring *ring)
5330 {
5331         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5332         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5333         WREG32(mmCP_HQD_PQ_RPTR, 0);
5334         WREG32(mmCP_HQD_PQ_WPTR, 0);
5335         vi_srbm_select(adev, 0, 0, 0, 0);
5336 }
5337
5338 static int gfx_v8_0_post_soft_reset(void *handle)
5339 {
5340         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5341         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5342
5343         if ((!adev->gfx.grbm_soft_reset) &&
5344             (!adev->gfx.srbm_soft_reset))
5345                 return 0;
5346
5347         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5348         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5349
5350         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5351             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5352                 gfx_v8_0_cp_gfx_resume(adev);
5353
5354         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5355             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5356             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5357             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5358                 int i;
5359
5360                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5361                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5362
5363                         gfx_v8_0_init_hqd(adev, ring);
5364                 }
5365                 gfx_v8_0_cp_compute_resume(adev);
5366         }
5367         gfx_v8_0_rlc_start(adev);
5368
5369         return 0;
5370 }
5371
5372 /**
5373  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5374  *
5375  * @adev: amdgpu_device pointer
5376  *
5377  * Fetches a GPU clock counter snapshot.
5378  * Returns the 64 bit clock counter snapshot.
5379  */
5380 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5381 {
5382         uint64_t clock;
5383
5384         mutex_lock(&adev->gfx.gpu_clock_mutex);
5385         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5386         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5387                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5388         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5389         return clock;
5390 }
5391
5392 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5393                                           uint32_t vmid,
5394                                           uint32_t gds_base, uint32_t gds_size,
5395                                           uint32_t gws_base, uint32_t gws_size,
5396                                           uint32_t oa_base, uint32_t oa_size)
5397 {
5398         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5399         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5400
5401         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5402         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5403
5404         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5405         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5406
5407         /* GDS Base */
5408         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5409         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5410                                 WRITE_DATA_DST_SEL(0)));
5411         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5412         amdgpu_ring_write(ring, 0);
5413         amdgpu_ring_write(ring, gds_base);
5414
5415         /* GDS Size */
5416         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5417         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5418                                 WRITE_DATA_DST_SEL(0)));
5419         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5420         amdgpu_ring_write(ring, 0);
5421         amdgpu_ring_write(ring, gds_size);
5422
5423         /* GWS */
5424         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5425         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5426                                 WRITE_DATA_DST_SEL(0)));
5427         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5428         amdgpu_ring_write(ring, 0);
5429         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5430
5431         /* OA */
5432         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5433         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5434                                 WRITE_DATA_DST_SEL(0)));
5435         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5436         amdgpu_ring_write(ring, 0);
5437         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5438 }
5439
5440 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5441         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5442         .select_se_sh = &gfx_v8_0_select_se_sh,
5443 };
5444
5445 static int gfx_v8_0_early_init(void *handle)
5446 {
5447         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5448
5449         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5450         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5451         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5452         gfx_v8_0_set_ring_funcs(adev);
5453         gfx_v8_0_set_irq_funcs(adev);
5454         gfx_v8_0_set_gds_init(adev);
5455         gfx_v8_0_set_rlc_funcs(adev);
5456
5457         return 0;
5458 }
5459
5460 static int gfx_v8_0_late_init(void *handle)
5461 {
5462         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5463         int r;
5464
5465         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5466         if (r)
5467                 return r;
5468
5469         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5470         if (r)
5471                 return r;
5472
5473         /* requires IBs so do in late init after IB pool is initialized */
5474         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5475         if (r)
5476                 return r;
5477
5478         amdgpu_set_powergating_state(adev,
5479                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5480
5481         return 0;
5482 }
5483
5484 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5485                                                        bool enable)
5486 {
5487         if (adev->asic_type == CHIP_POLARIS11)
5488                 /* Send msg to SMU via Powerplay */
5489                 amdgpu_set_powergating_state(adev,
5490                                              AMD_IP_BLOCK_TYPE_SMC,
5491                                              enable ?
5492                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5493
5494         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5495 }
5496
5497 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5498                                                         bool enable)
5499 {
5500         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5501 }
5502
5503 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5504                 bool enable)
5505 {
5506         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5507 }
5508
5509 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5510                                           bool enable)
5511 {
5512         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5513 }
5514
5515 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5516                                                 bool enable)
5517 {
5518         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5519
5520         /* Read any GFX register to wake up GFX. */
5521         if (!enable)
5522                 RREG32(mmDB_RENDER_CONTROL);
5523 }
5524
5525 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5526                                           bool enable)
5527 {
5528         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5529                 cz_enable_gfx_cg_power_gating(adev, true);
5530                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5531                         cz_enable_gfx_pipeline_power_gating(adev, true);
5532         } else {
5533                 cz_enable_gfx_cg_power_gating(adev, false);
5534                 cz_enable_gfx_pipeline_power_gating(adev, false);
5535         }
5536 }
5537
5538 static int gfx_v8_0_set_powergating_state(void *handle,
5539                                           enum amd_powergating_state state)
5540 {
5541         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5542         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5543
5544         if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5545                 return 0;
5546
5547         switch (adev->asic_type) {
5548         case CHIP_CARRIZO:
5549         case CHIP_STONEY:
5550                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5551                         cz_update_gfx_cg_power_gating(adev, enable);
5552
5553                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5554                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5555                 else
5556                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5557
5558                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5559                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5560                 else
5561                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5562                 break;
5563         case CHIP_POLARIS11:
5564                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5565                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5566                 else
5567                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5568
5569                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5570                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5571                 else
5572                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5573
5574                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5575                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5576                 else
5577                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5578                 break;
5579         default:
5580                 break;
5581         }
5582
5583         return 0;
5584 }
5585
5586 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5587                                      uint32_t reg_addr, uint32_t cmd)
5588 {
5589         uint32_t data;
5590
5591         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5592
5593         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5594         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5595
5596         data = RREG32(mmRLC_SERDES_WR_CTRL);
5597         if (adev->asic_type == CHIP_STONEY)
5598                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5599                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5600                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5601                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5602                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5603                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5604                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5605                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5606                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5607         else
5608                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5609                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5610                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5611                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5612                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5613                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5614                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5615                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5616                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5617                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5618                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5619         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5620                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5621                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5622                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5623
5624         WREG32(mmRLC_SERDES_WR_CTRL, data);
5625 }
5626
5627 #define MSG_ENTER_RLC_SAFE_MODE     1
5628 #define MSG_EXIT_RLC_SAFE_MODE      0
5629 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5630 #define RLC_GPR_REG2__REQ__SHIFT 0
5631 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5632 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5633
5634 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5635 {
5636         u32 data = 0;
5637         unsigned i;
5638
5639         data = RREG32(mmRLC_CNTL);
5640         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5641                 return;
5642
5643         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5644             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5645                                AMD_PG_SUPPORT_GFX_DMG))) {
5646                 data |= RLC_GPR_REG2__REQ_MASK;
5647                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5648                 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5649                 WREG32(mmRLC_GPR_REG2, data);
5650
5651                 for (i = 0; i < adev->usec_timeout; i++) {
5652                         if ((RREG32(mmRLC_GPM_STAT) &
5653                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5654                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5655                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5656                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5657                                 break;
5658                         udelay(1);
5659                 }
5660
5661                 for (i = 0; i < adev->usec_timeout; i++) {
5662                         if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5663                                 break;
5664                         udelay(1);
5665                 }
5666                 adev->gfx.rlc.in_safe_mode = true;
5667         }
5668 }
5669
5670 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5671 {
5672         u32 data;
5673         unsigned i;
5674
5675         data = RREG32(mmRLC_CNTL);
5676         if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5677                 return;
5678
5679         if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5680             (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5681                                AMD_PG_SUPPORT_GFX_DMG))) {
5682                 data |= RLC_GPR_REG2__REQ_MASK;
5683                 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5684                 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5685                 WREG32(mmRLC_GPR_REG2, data);
5686                 adev->gfx.rlc.in_safe_mode = false;
5687         }
5688
5689         for (i = 0; i < adev->usec_timeout; i++) {
5690                 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ))
5691                         break;
5692                 udelay(1);
5693         }
5694 }
5695
5696 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5697 {
5698         u32 data;
5699         unsigned i;
5700
5701         data = RREG32(mmRLC_CNTL);
5702         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5703                 return;
5704
5705         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5706                 data |= RLC_SAFE_MODE__CMD_MASK;
5707                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5708                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5709                 WREG32(mmRLC_SAFE_MODE, data);
5710
5711                 for (i = 0; i < adev->usec_timeout; i++) {
5712                         if ((RREG32(mmRLC_GPM_STAT) &
5713                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5714                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5715                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5716                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5717                                 break;
5718                         udelay(1);
5719                 }
5720
5721                 for (i = 0; i < adev->usec_timeout; i++) {
5722                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5723                                 break;
5724                         udelay(1);
5725                 }
5726                 adev->gfx.rlc.in_safe_mode = true;
5727         }
5728 }
5729
5730 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5731 {
5732         u32 data = 0;
5733         unsigned i;
5734
5735         data = RREG32(mmRLC_CNTL);
5736         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5737                 return;
5738
5739         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5740                 if (adev->gfx.rlc.in_safe_mode) {
5741                         data |= RLC_SAFE_MODE__CMD_MASK;
5742                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5743                         WREG32(mmRLC_SAFE_MODE, data);
5744                         adev->gfx.rlc.in_safe_mode = false;
5745                 }
5746         }
5747
5748         for (i = 0; i < adev->usec_timeout; i++) {
5749                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5750                         break;
5751                 udelay(1);
5752         }
5753 }
5754
5755 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5756 {
5757         adev->gfx.rlc.in_safe_mode = true;
5758 }
5759
5760 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5761 {
5762         adev->gfx.rlc.in_safe_mode = false;
5763 }
5764
5765 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5766         .enter_safe_mode = cz_enter_rlc_safe_mode,
5767         .exit_safe_mode = cz_exit_rlc_safe_mode
5768 };
5769
5770 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5771         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5772         .exit_safe_mode = iceland_exit_rlc_safe_mode
5773 };
5774
5775 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5776         .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5777         .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5778 };
5779
5780 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5781                                                       bool enable)
5782 {
5783         uint32_t temp, data;
5784
5785         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5786
5787         /* It is disabled by HW by default */
5788         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5789                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5790                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5791                                 /* 1 - RLC memory Light sleep */
5792                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5793
5794                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5795                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5796                 }
5797
5798                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5799                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5800                 if (adev->flags & AMD_IS_APU)
5801                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5802                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5803                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5804                 else
5805                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5806                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5807                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5808                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5809
5810                 if (temp != data)
5811                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5812
5813                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5814                 gfx_v8_0_wait_for_rlc_serdes(adev);
5815
5816                 /* 5 - clear mgcg override */
5817                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5818
5819                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5820                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5821                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5822                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5823                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5824                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5825                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5826                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5827                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5828                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5829                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5830                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5831                         if (temp != data)
5832                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5833                 }
5834                 udelay(50);
5835
5836                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5837                 gfx_v8_0_wait_for_rlc_serdes(adev);
5838         } else {
5839                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5840                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5841                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5842                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5843                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5844                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5845                 if (temp != data)
5846                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5847
5848                 /* 2 - disable MGLS in RLC */
5849                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5850                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5851                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5852                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5853                 }
5854
5855                 /* 3 - disable MGLS in CP */
5856                 data = RREG32(mmCP_MEM_SLP_CNTL);
5857                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5858                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5859                         WREG32(mmCP_MEM_SLP_CNTL, data);
5860                 }
5861
5862                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5863                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5864                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5865                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5866                 if (temp != data)
5867                         WREG32(mmCGTS_SM_CTRL_REG, data);
5868
5869                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5870                 gfx_v8_0_wait_for_rlc_serdes(adev);
5871
5872                 /* 6 - set mgcg override */
5873                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5874
5875                 udelay(50);
5876
5877                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5878                 gfx_v8_0_wait_for_rlc_serdes(adev);
5879         }
5880
5881         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5882 }
5883
5884 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5885                                                       bool enable)
5886 {
5887         uint32_t temp, temp1, data, data1;
5888
5889         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5890
5891         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5892
5893         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5894                 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5895                  * Cmp_busy/GFX_Idle interrupts
5896                  */
5897                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5898
5899                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5900                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5901                 if (temp1 != data1)
5902                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5903
5904                 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5905                 gfx_v8_0_wait_for_rlc_serdes(adev);
5906
5907                 /* 3 - clear cgcg override */
5908                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5909
5910                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5911                 gfx_v8_0_wait_for_rlc_serdes(adev);
5912
5913                 /* 4 - write cmd to set CGLS */
5914                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5915
5916                 /* 5 - enable cgcg */
5917                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5918
5919                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5920                         /* enable cgls*/
5921                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5922
5923                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5924                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5925
5926                         if (temp1 != data1)
5927                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5928                 } else {
5929                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5930                 }
5931
5932                 if (temp != data)
5933                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5934         } else {
5935                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5936                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5937
5938                 /* TEST CGCG */
5939                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5940                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5941                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5942                 if (temp1 != data1)
5943                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5944
5945                 /* read gfx register to wake up cgcg */
5946                 RREG32(mmCB_CGTT_SCLK_CTRL);
5947                 RREG32(mmCB_CGTT_SCLK_CTRL);
5948                 RREG32(mmCB_CGTT_SCLK_CTRL);
5949                 RREG32(mmCB_CGTT_SCLK_CTRL);
5950
5951                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5952                 gfx_v8_0_wait_for_rlc_serdes(adev);
5953
5954                 /* write cmd to Set CGCG Overrride */
5955                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5956
5957                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5958                 gfx_v8_0_wait_for_rlc_serdes(adev);
5959
5960                 /* write cmd to Clear CGLS */
5961                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5962
5963                 /* disable cgcg, cgls should be disabled too. */
5964                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5965                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5966                 if (temp != data)
5967                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5968         }
5969
5970         gfx_v8_0_wait_for_rlc_serdes(adev);
5971
5972         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5973 }
5974 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5975                                             bool enable)
5976 {
5977         if (enable) {
5978                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5979                  * ===  MGCG + MGLS + TS(CG/LS) ===
5980                  */
5981                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5982                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5983         } else {
5984                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5985                  * ===  CGCG + CGLS ===
5986                  */
5987                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5988                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5989         }
5990         return 0;
5991 }
5992
5993 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5994                                           enum amd_clockgating_state state)
5995 {
5996         uint32_t msg_id, pp_state;
5997         void *pp_handle = adev->powerplay.pp_handle;
5998
5999         if (state == AMD_CG_STATE_UNGATE)
6000                 pp_state = 0;
6001         else
6002                 pp_state = PP_STATE_CG | PP_STATE_LS;
6003
6004         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6005                         PP_BLOCK_GFX_CG,
6006                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6007                         pp_state);
6008         amd_set_clockgating_by_smu(pp_handle, msg_id);
6009
6010         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6011                         PP_BLOCK_GFX_MG,
6012                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6013                         pp_state);
6014         amd_set_clockgating_by_smu(pp_handle, msg_id);
6015
6016         return 0;
6017 }
6018
6019 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6020                                           enum amd_clockgating_state state)
6021 {
6022         uint32_t msg_id, pp_state;
6023         void *pp_handle = adev->powerplay.pp_handle;
6024
6025         if (state == AMD_CG_STATE_UNGATE)
6026                 pp_state = 0;
6027         else
6028                 pp_state = PP_STATE_CG | PP_STATE_LS;
6029
6030         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6031                         PP_BLOCK_GFX_CG,
6032                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6033                         pp_state);
6034         amd_set_clockgating_by_smu(pp_handle, msg_id);
6035
6036         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6037                         PP_BLOCK_GFX_3D,
6038                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6039                         pp_state);
6040         amd_set_clockgating_by_smu(pp_handle, msg_id);
6041
6042         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6043                         PP_BLOCK_GFX_MG,
6044                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6045                         pp_state);
6046         amd_set_clockgating_by_smu(pp_handle, msg_id);
6047
6048         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6049                         PP_BLOCK_GFX_RLC,
6050                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6051                         pp_state);
6052         amd_set_clockgating_by_smu(pp_handle, msg_id);
6053
6054         msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6055                         PP_BLOCK_GFX_CP,
6056                         PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS,
6057                         pp_state);
6058         amd_set_clockgating_by_smu(pp_handle, msg_id);
6059
6060         return 0;
6061 }
6062
6063 static int gfx_v8_0_set_clockgating_state(void *handle,
6064                                           enum amd_clockgating_state state)
6065 {
6066         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6067
6068         switch (adev->asic_type) {
6069         case CHIP_FIJI:
6070         case CHIP_CARRIZO:
6071         case CHIP_STONEY:
6072                 gfx_v8_0_update_gfx_clock_gating(adev,
6073                                                  state == AMD_CG_STATE_GATE ? true : false);
6074                 break;
6075         case CHIP_TONGA:
6076                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6077                 break;
6078         case CHIP_POLARIS10:
6079         case CHIP_POLARIS11:
6080                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6081                 break;
6082         default:
6083                 break;
6084         }
6085         return 0;
6086 }
6087
6088 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6089 {
6090         return ring->adev->wb.wb[ring->rptr_offs];
6091 }
6092
6093 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6094 {
6095         struct amdgpu_device *adev = ring->adev;
6096
6097         if (ring->use_doorbell)
6098                 /* XXX check if swapping is necessary on BE */
6099                 return ring->adev->wb.wb[ring->wptr_offs];
6100         else
6101                 return RREG32(mmCP_RB0_WPTR);
6102 }
6103
6104 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6105 {
6106         struct amdgpu_device *adev = ring->adev;
6107
6108         if (ring->use_doorbell) {
6109                 /* XXX check if swapping is necessary on BE */
6110                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6111                 WDOORBELL32(ring->doorbell_index, ring->wptr);
6112         } else {
6113                 WREG32(mmCP_RB0_WPTR, ring->wptr);
6114                 (void)RREG32(mmCP_RB0_WPTR);
6115         }
6116 }
6117
6118 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6119 {
6120         u32 ref_and_mask, reg_mem_engine;
6121
6122         if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
6123                 switch (ring->me) {
6124                 case 1:
6125                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6126                         break;
6127                 case 2:
6128                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6129                         break;
6130                 default:
6131                         return;
6132                 }
6133                 reg_mem_engine = 0;
6134         } else {
6135                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6136                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6137         }
6138
6139         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6140         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6141                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6142                                  reg_mem_engine));
6143         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6144         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6145         amdgpu_ring_write(ring, ref_and_mask);
6146         amdgpu_ring_write(ring, ref_and_mask);
6147         amdgpu_ring_write(ring, 0x20); /* poll interval */
6148 }
6149
6150 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6151 {
6152         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6153         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6154                                  WRITE_DATA_DST_SEL(0) |
6155                                  WR_CONFIRM));
6156         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6157         amdgpu_ring_write(ring, 0);
6158         amdgpu_ring_write(ring, 1);
6159
6160 }
6161
6162 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6163                                       struct amdgpu_ib *ib,
6164                                       unsigned vm_id, bool ctx_switch)
6165 {
6166         u32 header, control = 0;
6167
6168         if (ib->flags & AMDGPU_IB_FLAG_CE)
6169                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6170         else
6171                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6172
6173         control |= ib->length_dw | (vm_id << 24);
6174
6175         amdgpu_ring_write(ring, header);
6176         amdgpu_ring_write(ring,
6177 #ifdef __BIG_ENDIAN
6178                           (2 << 0) |
6179 #endif
6180                           (ib->gpu_addr & 0xFFFFFFFC));
6181         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6182         amdgpu_ring_write(ring, control);
6183 }
6184
6185 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6186                                           struct amdgpu_ib *ib,
6187                                           unsigned vm_id, bool ctx_switch)
6188 {
6189         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6190
6191         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6192         amdgpu_ring_write(ring,
6193 #ifdef __BIG_ENDIAN
6194                                 (2 << 0) |
6195 #endif
6196                                 (ib->gpu_addr & 0xFFFFFFFC));
6197         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6198         amdgpu_ring_write(ring, control);
6199 }
6200
6201 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6202                                          u64 seq, unsigned flags)
6203 {
6204         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6205         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6206
6207         /* EVENT_WRITE_EOP - flush caches, send int */
6208         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6209         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6210                                  EOP_TC_ACTION_EN |
6211                                  EOP_TC_WB_ACTION_EN |
6212                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6213                                  EVENT_INDEX(5)));
6214         amdgpu_ring_write(ring, addr & 0xfffffffc);
6215         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6216                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6217         amdgpu_ring_write(ring, lower_32_bits(seq));
6218         amdgpu_ring_write(ring, upper_32_bits(seq));
6219
6220 }
6221
6222 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6223 {
6224         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6225         uint32_t seq = ring->fence_drv.sync_seq;
6226         uint64_t addr = ring->fence_drv.gpu_addr;
6227
6228         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6229         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6230                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6231                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6232         amdgpu_ring_write(ring, addr & 0xfffffffc);
6233         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6234         amdgpu_ring_write(ring, seq);
6235         amdgpu_ring_write(ring, 0xffffffff);
6236         amdgpu_ring_write(ring, 4); /* poll interval */
6237 }
6238
6239 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6240                                         unsigned vm_id, uint64_t pd_addr)
6241 {
6242         int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6243
6244         /* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */
6245         if (usepfp)
6246                 amdgpu_ring_insert_nop(ring, 128);
6247
6248         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6249         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6250                                  WRITE_DATA_DST_SEL(0)) |
6251                                  WR_CONFIRM);
6252         if (vm_id < 8) {
6253                 amdgpu_ring_write(ring,
6254                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6255         } else {
6256                 amdgpu_ring_write(ring,
6257                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6258         }
6259         amdgpu_ring_write(ring, 0);
6260         amdgpu_ring_write(ring, pd_addr >> 12);
6261
6262         /* bits 0-15 are the VM contexts0-15 */
6263         /* invalidate the cache */
6264         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6265         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6266                                  WRITE_DATA_DST_SEL(0)));
6267         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6268         amdgpu_ring_write(ring, 0);
6269         amdgpu_ring_write(ring, 1 << vm_id);
6270
6271         /* wait for the invalidate to complete */
6272         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6273         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6274                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6275                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6276         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6277         amdgpu_ring_write(ring, 0);
6278         amdgpu_ring_write(ring, 0); /* ref */
6279         amdgpu_ring_write(ring, 0); /* mask */
6280         amdgpu_ring_write(ring, 0x20); /* poll interval */
6281
6282         /* compute doesn't have PFP */
6283         if (usepfp) {
6284                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6285                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6286                 amdgpu_ring_write(ring, 0x0);
6287                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6288                 amdgpu_ring_insert_nop(ring, 128);
6289         }
6290 }
6291
6292 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6293 {
6294         return ring->adev->wb.wb[ring->wptr_offs];
6295 }
6296
6297 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6298 {
6299         struct amdgpu_device *adev = ring->adev;
6300
6301         /* XXX check if swapping is necessary on BE */
6302         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6303         WDOORBELL32(ring->doorbell_index, ring->wptr);
6304 }
6305
6306 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6307                                              u64 addr, u64 seq,
6308                                              unsigned flags)
6309 {
6310         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6311         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6312
6313         /* RELEASE_MEM - flush caches, send int */
6314         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6315         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6316                                  EOP_TC_ACTION_EN |
6317                                  EOP_TC_WB_ACTION_EN |
6318                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6319                                  EVENT_INDEX(5)));
6320         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6321         amdgpu_ring_write(ring, addr & 0xfffffffc);
6322         amdgpu_ring_write(ring, upper_32_bits(addr));
6323         amdgpu_ring_write(ring, lower_32_bits(seq));
6324         amdgpu_ring_write(ring, upper_32_bits(seq));
6325 }
6326
6327 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6328 {
6329         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6330         amdgpu_ring_write(ring, 0);
6331 }
6332
6333 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6334 {
6335         uint32_t dw2 = 0;
6336
6337         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6338         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6339                 /* set load_global_config & load_global_uconfig */
6340                 dw2 |= 0x8001;
6341                 /* set load_cs_sh_regs */
6342                 dw2 |= 0x01000000;
6343                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6344                 dw2 |= 0x10002;
6345
6346                 /* set load_ce_ram if preamble presented */
6347                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6348                         dw2 |= 0x10000000;
6349         } else {
6350                 /* still load_ce_ram if this is the first time preamble presented
6351                  * although there is no context switch happens.
6352                  */
6353                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6354                         dw2 |= 0x10000000;
6355         }
6356
6357         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6358         amdgpu_ring_write(ring, dw2);
6359         amdgpu_ring_write(ring, 0);
6360 }
6361
6362 static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring)
6363 {
6364         return
6365                 4; /* gfx_v8_0_ring_emit_ib_gfx */
6366 }
6367
6368 static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring)
6369 {
6370         return
6371                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6372                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6373                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6374                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6375                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6376                 256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6377                 2 + /* gfx_v8_ring_emit_sb */
6378                 3; /* gfx_v8_ring_emit_cntxcntl */
6379 }
6380
6381 static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring)
6382 {
6383         return
6384                 4; /* gfx_v8_0_ring_emit_ib_compute */
6385 }
6386
6387 static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring)
6388 {
6389         return
6390                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6391                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6392                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6393                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6394                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6395                 7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6396 }
6397
6398 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6399                                                  enum amdgpu_interrupt_state state)
6400 {
6401         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6402                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6403 }
6404
6405 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6406                                                      int me, int pipe,
6407                                                      enum amdgpu_interrupt_state state)
6408 {
6409         /*
6410          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6411          * handles the setting of interrupts for this specific pipe. All other
6412          * pipes' interrupts are set by amdkfd.
6413          */
6414
6415         if (me == 1) {
6416                 switch (pipe) {
6417                 case 0:
6418                         break;
6419                 default:
6420                         DRM_DEBUG("invalid pipe %d\n", pipe);
6421                         return;
6422                 }
6423         } else {
6424                 DRM_DEBUG("invalid me %d\n", me);
6425                 return;
6426         }
6427
6428         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6429                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6430 }
6431
6432 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6433                                              struct amdgpu_irq_src *source,
6434                                              unsigned type,
6435                                              enum amdgpu_interrupt_state state)
6436 {
6437         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6438                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6439
6440         return 0;
6441 }
6442
6443 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6444                                               struct amdgpu_irq_src *source,
6445                                               unsigned type,
6446                                               enum amdgpu_interrupt_state state)
6447 {
6448         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6449                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6450
6451         return 0;
6452 }
6453
6454 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6455                                             struct amdgpu_irq_src *src,
6456                                             unsigned type,
6457                                             enum amdgpu_interrupt_state state)
6458 {
6459         switch (type) {
6460         case AMDGPU_CP_IRQ_GFX_EOP:
6461                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6462                 break;
6463         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6464                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6465                 break;
6466         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6467                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6468                 break;
6469         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6470                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6471                 break;
6472         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6473                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6474                 break;
6475         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6476                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6477                 break;
6478         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6479                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6480                 break;
6481         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6482                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6483                 break;
6484         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6485                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6486                 break;
6487         default:
6488                 break;
6489         }
6490         return 0;
6491 }
6492
6493 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6494                             struct amdgpu_irq_src *source,
6495                             struct amdgpu_iv_entry *entry)
6496 {
6497         int i;
6498         u8 me_id, pipe_id, queue_id;
6499         struct amdgpu_ring *ring;
6500
6501         DRM_DEBUG("IH: CP EOP\n");
6502         me_id = (entry->ring_id & 0x0c) >> 2;
6503         pipe_id = (entry->ring_id & 0x03) >> 0;
6504         queue_id = (entry->ring_id & 0x70) >> 4;
6505
6506         switch (me_id) {
6507         case 0:
6508                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6509                 break;
6510         case 1:
6511         case 2:
6512                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6513                         ring = &adev->gfx.compute_ring[i];
6514                         /* Per-queue interrupt is supported for MEC starting from VI.
6515                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6516                           */
6517                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6518                                 amdgpu_fence_process(ring);
6519                 }
6520                 break;
6521         }
6522         return 0;
6523 }
6524
6525 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6526                                  struct amdgpu_irq_src *source,
6527                                  struct amdgpu_iv_entry *entry)
6528 {
6529         DRM_ERROR("Illegal register access in command stream\n");
6530         schedule_work(&adev->reset_work);
6531         return 0;
6532 }
6533
6534 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6535                                   struct amdgpu_irq_src *source,
6536                                   struct amdgpu_iv_entry *entry)
6537 {
6538         DRM_ERROR("Illegal instruction in command stream\n");
6539         schedule_work(&adev->reset_work);
6540         return 0;
6541 }
6542
6543 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6544         .name = "gfx_v8_0",
6545         .early_init = gfx_v8_0_early_init,
6546         .late_init = gfx_v8_0_late_init,
6547         .sw_init = gfx_v8_0_sw_init,
6548         .sw_fini = gfx_v8_0_sw_fini,
6549         .hw_init = gfx_v8_0_hw_init,
6550         .hw_fini = gfx_v8_0_hw_fini,
6551         .suspend = gfx_v8_0_suspend,
6552         .resume = gfx_v8_0_resume,
6553         .is_idle = gfx_v8_0_is_idle,
6554         .wait_for_idle = gfx_v8_0_wait_for_idle,
6555         .check_soft_reset = gfx_v8_0_check_soft_reset,
6556         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6557         .soft_reset = gfx_v8_0_soft_reset,
6558         .post_soft_reset = gfx_v8_0_post_soft_reset,
6559         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6560         .set_powergating_state = gfx_v8_0_set_powergating_state,
6561 };
6562
6563 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6564         .get_rptr = gfx_v8_0_ring_get_rptr,
6565         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6566         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6567         .parse_cs = NULL,
6568         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6569         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6570         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6571         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6572         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6573         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6574         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6575         .test_ring = gfx_v8_0_ring_test_ring,
6576         .test_ib = gfx_v8_0_ring_test_ib,
6577         .insert_nop = amdgpu_ring_insert_nop,
6578         .pad_ib = amdgpu_ring_generic_pad_ib,
6579         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6580         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6581         .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx,
6582         .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx,
6583 };
6584
6585 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6586         .get_rptr = gfx_v8_0_ring_get_rptr,
6587         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6588         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6589         .parse_cs = NULL,
6590         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6591         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6592         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6593         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6594         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6595         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6596         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6597         .test_ring = gfx_v8_0_ring_test_ring,
6598         .test_ib = gfx_v8_0_ring_test_ib,
6599         .insert_nop = amdgpu_ring_insert_nop,
6600         .pad_ib = amdgpu_ring_generic_pad_ib,
6601         .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute,
6602         .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute,
6603 };
6604
6605 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6606 {
6607         int i;
6608
6609         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6610                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6611
6612         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6613                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6614 }
6615
6616 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6617         .set = gfx_v8_0_set_eop_interrupt_state,
6618         .process = gfx_v8_0_eop_irq,
6619 };
6620
6621 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6622         .set = gfx_v8_0_set_priv_reg_fault_state,
6623         .process = gfx_v8_0_priv_reg_irq,
6624 };
6625
6626 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6627         .set = gfx_v8_0_set_priv_inst_fault_state,
6628         .process = gfx_v8_0_priv_inst_irq,
6629 };
6630
6631 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6632 {
6633         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6634         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6635
6636         adev->gfx.priv_reg_irq.num_types = 1;
6637         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6638
6639         adev->gfx.priv_inst_irq.num_types = 1;
6640         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6641 }
6642
6643 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6644 {
6645         switch (adev->asic_type) {
6646         case CHIP_TOPAZ:
6647                 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6648                 break;
6649         case CHIP_STONEY:
6650         case CHIP_CARRIZO:
6651                 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6652                 break;
6653         default:
6654                 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6655                 break;
6656         }
6657 }
6658
6659 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6660 {
6661         /* init asci gds info */
6662         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6663         adev->gds.gws.total_size = 64;
6664         adev->gds.oa.total_size = 16;
6665
6666         if (adev->gds.mem.total_size == 64 * 1024) {
6667                 adev->gds.mem.gfx_partition_size = 4096;
6668                 adev->gds.mem.cs_partition_size = 4096;
6669
6670                 adev->gds.gws.gfx_partition_size = 4;
6671                 adev->gds.gws.cs_partition_size = 4;
6672
6673                 adev->gds.oa.gfx_partition_size = 4;
6674                 adev->gds.oa.cs_partition_size = 1;
6675         } else {
6676                 adev->gds.mem.gfx_partition_size = 1024;
6677                 adev->gds.mem.cs_partition_size = 1024;
6678
6679                 adev->gds.gws.gfx_partition_size = 16;
6680                 adev->gds.gws.cs_partition_size = 16;
6681
6682                 adev->gds.oa.gfx_partition_size = 4;
6683                 adev->gds.oa.cs_partition_size = 4;
6684         }
6685 }
6686
6687 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6688                                                  u32 bitmap)
6689 {
6690         u32 data;
6691
6692         if (!bitmap)
6693                 return;
6694
6695         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6696         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6697
6698         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6699 }
6700
6701 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6702 {
6703         u32 data, mask;
6704
6705         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
6706                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6707
6708         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6709
6710         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
6711 }
6712
6713 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6714 {
6715         int i, j, k, counter, active_cu_number = 0;
6716         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6717         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6718         unsigned disable_masks[4 * 2];
6719
6720         memset(cu_info, 0, sizeof(*cu_info));
6721
6722         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6723
6724         mutex_lock(&adev->grbm_idx_mutex);
6725         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6726                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6727                         mask = 1;
6728                         ao_bitmap = 0;
6729                         counter = 0;
6730                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6731                         if (i < 4 && j < 2)
6732                                 gfx_v8_0_set_user_cu_inactive_bitmap(
6733                                         adev, disable_masks[i * 2 + j]);
6734                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6735                         cu_info->bitmap[i][j] = bitmap;
6736
6737                         for (k = 0; k < 16; k ++) {
6738                                 if (bitmap & mask) {
6739                                         if (counter < 2)
6740                                                 ao_bitmap |= mask;
6741                                         counter ++;
6742                                 }
6743                                 mask <<= 1;
6744                         }
6745                         active_cu_number += counter;
6746                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6747                 }
6748         }
6749         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6750         mutex_unlock(&adev->grbm_idx_mutex);
6751
6752         cu_info->number = active_cu_number;
6753         cu_info->ao_cu_mask = ao_cu_mask;
6754 }