Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_MEC_HPD_SIZE 2048
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
139
140 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
151
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163
164 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
165 {
166         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
167         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
168         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
169         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
170         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
171         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
172         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
173         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
174         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
175         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
176         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
177         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
178         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
179         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
180         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
181         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
182 };
183
184 static const u32 golden_settings_tonga_a11[] =
185 {
186         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
187         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
188         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
189         mmGB_GPU_ID, 0x0000000f, 0x00000000,
190         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
191         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
192         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
193         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
194         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
195         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
196         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
197         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
198         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
199         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
200         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
201         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
202 };
203
204 static const u32 tonga_golden_common_all[] =
205 {
206         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
207         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
208         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
209         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
210         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
211         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
212         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
213         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
214 };
215
216 static const u32 tonga_mgcg_cgcg_init[] =
217 {
218         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
219         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
221         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
222         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
223         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
224         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
225         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
227         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
228         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
229         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
236         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
237         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
238         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
239         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
240         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
243         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
244         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
245         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
246         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
247         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
248         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
249         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
275         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
276         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
277         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
278         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
279         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
280         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
281         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
282         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
283         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
284         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
285         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
286         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
287         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
288         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
289         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
290         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
291         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
292         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
293 };
294
295 static const u32 golden_settings_polaris11_a11[] =
296 {
297         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
298         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
299         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
300         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
301         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
302         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
303         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
304         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
305         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
306         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
307         mmSQ_CONFIG, 0x07f80000, 0x01180000,
308         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
309         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
310         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
311         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
312         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
313         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
314 };
315
316 static const u32 polaris11_golden_common_all[] =
317 {
318         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
320         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
321         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
322         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
323         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
324 };
325
326 static const u32 golden_settings_polaris10_a11[] =
327 {
328         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
329         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
330         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
331         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
332         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
333         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
334         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
335         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
337         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
338         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
339         mmSQ_CONFIG, 0x07f80000, 0x07180000,
340         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
341         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
342         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
343         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
344         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
345 };
346
347 static const u32 polaris10_golden_common_all[] =
348 {
349         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
350         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
351         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
352         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
353         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
354         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
355         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
356         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
357 };
358
359 static const u32 fiji_golden_common_all[] =
360 {
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
363         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
364         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
365         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
366         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
367         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
368         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
369         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
370         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
371 };
372
373 static const u32 golden_settings_fiji_a10[] =
374 {
375         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
380         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
381         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
382         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
383         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
384         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
385         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
386 };
387
388 static const u32 fiji_mgcg_cgcg_init[] =
389 {
390         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
391         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
392         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
394         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
395         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
396         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
397         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
399         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
400         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
401         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
402         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
403         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
404         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
405         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
406         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
407         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
408         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
409         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
410         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
411         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
412         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
413         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
414         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
415         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
416         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
417         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
418         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
419         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
420         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
421         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
422         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
423         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
424         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
425 };
426
427 static const u32 golden_settings_iceland_a11[] =
428 {
429         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
430         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
431         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
432         mmGB_GPU_ID, 0x0000000f, 0x00000000,
433         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
434         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
435         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
437         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
438         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
439         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
440         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
441         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
442         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
443         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
444         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
445 };
446
447 static const u32 iceland_golden_common_all[] =
448 {
449         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
450         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
451         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
452         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
453         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
454         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
455         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
456         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
457 };
458
459 static const u32 iceland_mgcg_cgcg_init[] =
460 {
461         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
462         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
463         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
465         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
466         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
467         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
468         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
469         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
470         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
471         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
472         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
473         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
474         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
475         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
476         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
477         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
478         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
479         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
480         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
481         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
482         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
483         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
484         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
485         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
486         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
487         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
488         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
489         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
490         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
491         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
492         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
495         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
500         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
508         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
509         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
510         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
511         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
512         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
513         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
514         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
515         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
516         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
517         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
518         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
519         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
520         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
521         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
522         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
523         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
524         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
525 };
526
527 static const u32 cz_golden_settings_a11[] =
528 {
529         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
530         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
531         mmGB_GPU_ID, 0x0000000f, 0x00000000,
532         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
533         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
534         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
535         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
536         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
537         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
538         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
539         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
540         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
541 };
542
543 static const u32 cz_golden_common_all[] =
544 {
545         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
546         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
547         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
548         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
549         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
550         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
551         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
552         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
553 };
554
555 static const u32 cz_mgcg_cgcg_init[] =
556 {
557         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
558         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
559         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
560         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
561         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
562         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
563         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
564         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
566         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
567         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
568         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
569         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
570         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
571         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
572         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
573         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
574         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
575         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
576         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
577         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
578         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
579         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
580         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
581         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
582         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
583         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
584         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
585         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
586         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
587         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
588         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
614         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
615         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
616         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
617         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
618         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
619         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
620         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
621         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
622         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
623         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
624         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
625         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
626         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
627         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
628         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
629         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
630         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
631         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
632 };
633
634 static const u32 stoney_golden_settings_a11[] =
635 {
636         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
637         mmGB_GPU_ID, 0x0000000f, 0x00000000,
638         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
639         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
640         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
641         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
642         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
643         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
644         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
645         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
646 };
647
648 static const u32 stoney_golden_common_all[] =
649 {
650         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
651         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
652         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
653         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
654         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
655         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
656         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
657         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
658 };
659
660 static const u32 stoney_mgcg_cgcg_init[] =
661 {
662         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
663         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
664         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
665         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
666         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
667 };
668
669 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
670 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
671 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
672 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
673 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
674 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
675 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
676 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
677
678 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
679 {
680         switch (adev->asic_type) {
681         case CHIP_TOPAZ:
682                 amdgpu_program_register_sequence(adev,
683                                                  iceland_mgcg_cgcg_init,
684                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
685                 amdgpu_program_register_sequence(adev,
686                                                  golden_settings_iceland_a11,
687                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
688                 amdgpu_program_register_sequence(adev,
689                                                  iceland_golden_common_all,
690                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
691                 break;
692         case CHIP_FIJI:
693                 amdgpu_program_register_sequence(adev,
694                                                  fiji_mgcg_cgcg_init,
695                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
696                 amdgpu_program_register_sequence(adev,
697                                                  golden_settings_fiji_a10,
698                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
699                 amdgpu_program_register_sequence(adev,
700                                                  fiji_golden_common_all,
701                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
702                 break;
703
704         case CHIP_TONGA:
705                 amdgpu_program_register_sequence(adev,
706                                                  tonga_mgcg_cgcg_init,
707                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
708                 amdgpu_program_register_sequence(adev,
709                                                  golden_settings_tonga_a11,
710                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
711                 amdgpu_program_register_sequence(adev,
712                                                  tonga_golden_common_all,
713                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
714                 break;
715         case CHIP_POLARIS11:
716         case CHIP_POLARIS12:
717                 amdgpu_program_register_sequence(adev,
718                                                  golden_settings_polaris11_a11,
719                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
720                 amdgpu_program_register_sequence(adev,
721                                                  polaris11_golden_common_all,
722                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
723                 break;
724         case CHIP_POLARIS10:
725                 amdgpu_program_register_sequence(adev,
726                                                  golden_settings_polaris10_a11,
727                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
728                 amdgpu_program_register_sequence(adev,
729                                                  polaris10_golden_common_all,
730                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
731                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
732                 if (adev->pdev->revision == 0xc7 &&
733                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
734                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
735                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
736                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
737                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
738                 }
739                 break;
740         case CHIP_CARRIZO:
741                 amdgpu_program_register_sequence(adev,
742                                                  cz_mgcg_cgcg_init,
743                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
744                 amdgpu_program_register_sequence(adev,
745                                                  cz_golden_settings_a11,
746                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
747                 amdgpu_program_register_sequence(adev,
748                                                  cz_golden_common_all,
749                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
750                 break;
751         case CHIP_STONEY:
752                 amdgpu_program_register_sequence(adev,
753                                                  stoney_mgcg_cgcg_init,
754                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
755                 amdgpu_program_register_sequence(adev,
756                                                  stoney_golden_settings_a11,
757                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
758                 amdgpu_program_register_sequence(adev,
759                                                  stoney_golden_common_all,
760                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
761                 break;
762         default:
763                 break;
764         }
765 }
766
767 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
768 {
769         adev->gfx.scratch.num_reg = 8;
770         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
771         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
772 }
773
774 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
775 {
776         struct amdgpu_device *adev = ring->adev;
777         uint32_t scratch;
778         uint32_t tmp = 0;
779         unsigned i;
780         int r;
781
782         r = amdgpu_gfx_scratch_get(adev, &scratch);
783         if (r) {
784                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
785                 return r;
786         }
787         WREG32(scratch, 0xCAFEDEAD);
788         r = amdgpu_ring_alloc(ring, 3);
789         if (r) {
790                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
791                           ring->idx, r);
792                 amdgpu_gfx_scratch_free(adev, scratch);
793                 return r;
794         }
795         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
796         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
797         amdgpu_ring_write(ring, 0xDEADBEEF);
798         amdgpu_ring_commit(ring);
799
800         for (i = 0; i < adev->usec_timeout; i++) {
801                 tmp = RREG32(scratch);
802                 if (tmp == 0xDEADBEEF)
803                         break;
804                 DRM_UDELAY(1);
805         }
806         if (i < adev->usec_timeout) {
807                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
808                          ring->idx, i);
809         } else {
810                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
811                           ring->idx, scratch, tmp);
812                 r = -EINVAL;
813         }
814         amdgpu_gfx_scratch_free(adev, scratch);
815         return r;
816 }
817
818 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
819 {
820         struct amdgpu_device *adev = ring->adev;
821         struct amdgpu_ib ib;
822         struct dma_fence *f = NULL;
823         uint32_t scratch;
824         uint32_t tmp = 0;
825         long r;
826
827         r = amdgpu_gfx_scratch_get(adev, &scratch);
828         if (r) {
829                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
830                 return r;
831         }
832         WREG32(scratch, 0xCAFEDEAD);
833         memset(&ib, 0, sizeof(ib));
834         r = amdgpu_ib_get(adev, NULL, 256, &ib);
835         if (r) {
836                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
837                 goto err1;
838         }
839         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
840         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
841         ib.ptr[2] = 0xDEADBEEF;
842         ib.length_dw = 3;
843
844         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
845         if (r)
846                 goto err2;
847
848         r = dma_fence_wait_timeout(f, false, timeout);
849         if (r == 0) {
850                 DRM_ERROR("amdgpu: IB test timed out.\n");
851                 r = -ETIMEDOUT;
852                 goto err2;
853         } else if (r < 0) {
854                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
855                 goto err2;
856         }
857         tmp = RREG32(scratch);
858         if (tmp == 0xDEADBEEF) {
859                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
860                 r = 0;
861         } else {
862                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
863                           scratch, tmp);
864                 r = -EINVAL;
865         }
866 err2:
867         amdgpu_ib_free(adev, &ib, NULL);
868         dma_fence_put(f);
869 err1:
870         amdgpu_gfx_scratch_free(adev, scratch);
871         return r;
872 }
873
874
875 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
876 {
877         release_firmware(adev->gfx.pfp_fw);
878         adev->gfx.pfp_fw = NULL;
879         release_firmware(adev->gfx.me_fw);
880         adev->gfx.me_fw = NULL;
881         release_firmware(adev->gfx.ce_fw);
882         adev->gfx.ce_fw = NULL;
883         release_firmware(adev->gfx.rlc_fw);
884         adev->gfx.rlc_fw = NULL;
885         release_firmware(adev->gfx.mec_fw);
886         adev->gfx.mec_fw = NULL;
887         if ((adev->asic_type != CHIP_STONEY) &&
888             (adev->asic_type != CHIP_TOPAZ))
889                 release_firmware(adev->gfx.mec2_fw);
890         adev->gfx.mec2_fw = NULL;
891
892         kfree(adev->gfx.rlc.register_list_format);
893 }
894
895 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
896 {
897         const char *chip_name;
898         char fw_name[30];
899         int err;
900         struct amdgpu_firmware_info *info = NULL;
901         const struct common_firmware_header *header = NULL;
902         const struct gfx_firmware_header_v1_0 *cp_hdr;
903         const struct rlc_firmware_header_v2_0 *rlc_hdr;
904         unsigned int *tmp = NULL, i;
905
906         DRM_DEBUG("\n");
907
908         switch (adev->asic_type) {
909         case CHIP_TOPAZ:
910                 chip_name = "topaz";
911                 break;
912         case CHIP_TONGA:
913                 chip_name = "tonga";
914                 break;
915         case CHIP_CARRIZO:
916                 chip_name = "carrizo";
917                 break;
918         case CHIP_FIJI:
919                 chip_name = "fiji";
920                 break;
921         case CHIP_POLARIS11:
922                 chip_name = "polaris11";
923                 break;
924         case CHIP_POLARIS10:
925                 chip_name = "polaris10";
926                 break;
927         case CHIP_POLARIS12:
928                 chip_name = "polaris12";
929                 break;
930         case CHIP_STONEY:
931                 chip_name = "stoney";
932                 break;
933         default:
934                 BUG();
935         }
936
937         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
938                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
939                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
940                 if (err == -ENOENT) {
941                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
942                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
943                 }
944         } else {
945                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
946                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
947         }
948         if (err)
949                 goto out;
950         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
951         if (err)
952                 goto out;
953         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
954         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
956
957         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
958                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
959                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
960                 if (err == -ENOENT) {
961                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
962                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
963                 }
964         } else {
965                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
966                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
967         }
968         if (err)
969                 goto out;
970         err = amdgpu_ucode_validate(adev->gfx.me_fw);
971         if (err)
972                 goto out;
973         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
974         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
975
976         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
977
978         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
979                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
980                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
981                 if (err == -ENOENT) {
982                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
983                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
984                 }
985         } else {
986                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
987                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
988         }
989         if (err)
990                 goto out;
991         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
992         if (err)
993                 goto out;
994         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
995         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
996         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
997
998         /*
999          * Support for MCBP/Virtualization in combination with chained IBs is
1000          * formal released on feature version #46
1001          */
1002         if (adev->gfx.ce_feature_version >= 46 &&
1003             adev->gfx.pfp_feature_version >= 46) {
1004                 adev->virt.chained_ib_support = true;
1005                 DRM_INFO("Chained IB support enabled!\n");
1006         } else
1007                 adev->virt.chained_ib_support = false;
1008
1009         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1010         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1011         if (err)
1012                 goto out;
1013         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1014         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1015         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1016         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1017
1018         adev->gfx.rlc.save_and_restore_offset =
1019                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1020         adev->gfx.rlc.clear_state_descriptor_offset =
1021                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1022         adev->gfx.rlc.avail_scratch_ram_locations =
1023                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1024         adev->gfx.rlc.reg_restore_list_size =
1025                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1026         adev->gfx.rlc.reg_list_format_start =
1027                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1028         adev->gfx.rlc.reg_list_format_separate_start =
1029                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1030         adev->gfx.rlc.starting_offsets_start =
1031                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1032         adev->gfx.rlc.reg_list_format_size_bytes =
1033                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1034         adev->gfx.rlc.reg_list_size_bytes =
1035                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1036
1037         adev->gfx.rlc.register_list_format =
1038                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1039                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1040
1041         if (!adev->gfx.rlc.register_list_format) {
1042                 err = -ENOMEM;
1043                 goto out;
1044         }
1045
1046         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1047                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1048         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1049                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1050
1051         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1052
1053         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1054                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1055         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1056                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1057
1058         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1059                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1060                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1061                 if (err == -ENOENT) {
1062                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1063                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1064                 }
1065         } else {
1066                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1067                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1068         }
1069         if (err)
1070                 goto out;
1071         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1072         if (err)
1073                 goto out;
1074         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1076         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1077
1078         if ((adev->asic_type != CHIP_STONEY) &&
1079             (adev->asic_type != CHIP_TOPAZ)) {
1080                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1081                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1082                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1083                         if (err == -ENOENT) {
1084                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1085                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1086                         }
1087                 } else {
1088                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1089                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1090                 }
1091                 if (!err) {
1092                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1093                         if (err)
1094                                 goto out;
1095                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1096                                 adev->gfx.mec2_fw->data;
1097                         adev->gfx.mec2_fw_version =
1098                                 le32_to_cpu(cp_hdr->header.ucode_version);
1099                         adev->gfx.mec2_feature_version =
1100                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1101                 } else {
1102                         err = 0;
1103                         adev->gfx.mec2_fw = NULL;
1104                 }
1105         }
1106
1107         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1108                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1109                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1110                 info->fw = adev->gfx.pfp_fw;
1111                 header = (const struct common_firmware_header *)info->fw->data;
1112                 adev->firmware.fw_size +=
1113                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1114
1115                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1116                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1117                 info->fw = adev->gfx.me_fw;
1118                 header = (const struct common_firmware_header *)info->fw->data;
1119                 adev->firmware.fw_size +=
1120                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1121
1122                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1123                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1124                 info->fw = adev->gfx.ce_fw;
1125                 header = (const struct common_firmware_header *)info->fw->data;
1126                 adev->firmware.fw_size +=
1127                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1128
1129                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1130                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1131                 info->fw = adev->gfx.rlc_fw;
1132                 header = (const struct common_firmware_header *)info->fw->data;
1133                 adev->firmware.fw_size +=
1134                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1135
1136                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1137                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1138                 info->fw = adev->gfx.mec_fw;
1139                 header = (const struct common_firmware_header *)info->fw->data;
1140                 adev->firmware.fw_size +=
1141                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1142
1143                 /* we need account JT in */
1144                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1145                 adev->firmware.fw_size +=
1146                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1147
1148                 if (amdgpu_sriov_vf(adev)) {
1149                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1150                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1151                         info->fw = adev->gfx.mec_fw;
1152                         adev->firmware.fw_size +=
1153                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1154                 }
1155
1156                 if (adev->gfx.mec2_fw) {
1157                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1158                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1159                         info->fw = adev->gfx.mec2_fw;
1160                         header = (const struct common_firmware_header *)info->fw->data;
1161                         adev->firmware.fw_size +=
1162                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1163                 }
1164
1165         }
1166
1167 out:
1168         if (err) {
1169                 dev_err(adev->dev,
1170                         "gfx8: Failed to load firmware \"%s\"\n",
1171                         fw_name);
1172                 release_firmware(adev->gfx.pfp_fw);
1173                 adev->gfx.pfp_fw = NULL;
1174                 release_firmware(adev->gfx.me_fw);
1175                 adev->gfx.me_fw = NULL;
1176                 release_firmware(adev->gfx.ce_fw);
1177                 adev->gfx.ce_fw = NULL;
1178                 release_firmware(adev->gfx.rlc_fw);
1179                 adev->gfx.rlc_fw = NULL;
1180                 release_firmware(adev->gfx.mec_fw);
1181                 adev->gfx.mec_fw = NULL;
1182                 release_firmware(adev->gfx.mec2_fw);
1183                 adev->gfx.mec2_fw = NULL;
1184         }
1185         return err;
1186 }
1187
1188 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1189                                     volatile u32 *buffer)
1190 {
1191         u32 count = 0, i;
1192         const struct cs_section_def *sect = NULL;
1193         const struct cs_extent_def *ext = NULL;
1194
1195         if (adev->gfx.rlc.cs_data == NULL)
1196                 return;
1197         if (buffer == NULL)
1198                 return;
1199
1200         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1201         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1202
1203         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1204         buffer[count++] = cpu_to_le32(0x80000000);
1205         buffer[count++] = cpu_to_le32(0x80000000);
1206
1207         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1208                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1209                         if (sect->id == SECT_CONTEXT) {
1210                                 buffer[count++] =
1211                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1212                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1213                                                 PACKET3_SET_CONTEXT_REG_START);
1214                                 for (i = 0; i < ext->reg_count; i++)
1215                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1216                         } else {
1217                                 return;
1218                         }
1219                 }
1220         }
1221
1222         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1223         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1224                         PACKET3_SET_CONTEXT_REG_START);
1225         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1226         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1227
1228         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1229         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1230
1231         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1232         buffer[count++] = cpu_to_le32(0);
1233 }
1234
1235 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1236 {
1237         const __le32 *fw_data;
1238         volatile u32 *dst_ptr;
1239         int me, i, max_me = 4;
1240         u32 bo_offset = 0;
1241         u32 table_offset, table_size;
1242
1243         if (adev->asic_type == CHIP_CARRIZO)
1244                 max_me = 5;
1245
1246         /* write the cp table buffer */
1247         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1248         for (me = 0; me < max_me; me++) {
1249                 if (me == 0) {
1250                         const struct gfx_firmware_header_v1_0 *hdr =
1251                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1252                         fw_data = (const __le32 *)
1253                                 (adev->gfx.ce_fw->data +
1254                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1255                         table_offset = le32_to_cpu(hdr->jt_offset);
1256                         table_size = le32_to_cpu(hdr->jt_size);
1257                 } else if (me == 1) {
1258                         const struct gfx_firmware_header_v1_0 *hdr =
1259                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1260                         fw_data = (const __le32 *)
1261                                 (adev->gfx.pfp_fw->data +
1262                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1263                         table_offset = le32_to_cpu(hdr->jt_offset);
1264                         table_size = le32_to_cpu(hdr->jt_size);
1265                 } else if (me == 2) {
1266                         const struct gfx_firmware_header_v1_0 *hdr =
1267                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1268                         fw_data = (const __le32 *)
1269                                 (adev->gfx.me_fw->data +
1270                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1271                         table_offset = le32_to_cpu(hdr->jt_offset);
1272                         table_size = le32_to_cpu(hdr->jt_size);
1273                 } else if (me == 3) {
1274                         const struct gfx_firmware_header_v1_0 *hdr =
1275                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1276                         fw_data = (const __le32 *)
1277                                 (adev->gfx.mec_fw->data +
1278                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1279                         table_offset = le32_to_cpu(hdr->jt_offset);
1280                         table_size = le32_to_cpu(hdr->jt_size);
1281                 } else  if (me == 4) {
1282                         const struct gfx_firmware_header_v1_0 *hdr =
1283                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1284                         fw_data = (const __le32 *)
1285                                 (adev->gfx.mec2_fw->data +
1286                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1287                         table_offset = le32_to_cpu(hdr->jt_offset);
1288                         table_size = le32_to_cpu(hdr->jt_size);
1289                 }
1290
1291                 for (i = 0; i < table_size; i ++) {
1292                         dst_ptr[bo_offset + i] =
1293                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1294                 }
1295
1296                 bo_offset += table_size;
1297         }
1298 }
1299
1300 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1301 {
1302         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1303         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1304 }
1305
1306 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1307 {
1308         volatile u32 *dst_ptr;
1309         u32 dws;
1310         const struct cs_section_def *cs_data;
1311         int r;
1312
1313         adev->gfx.rlc.cs_data = vi_cs_data;
1314
1315         cs_data = adev->gfx.rlc.cs_data;
1316
1317         if (cs_data) {
1318                 /* clear state block */
1319                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1320
1321                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1322                                               AMDGPU_GEM_DOMAIN_VRAM,
1323                                               &adev->gfx.rlc.clear_state_obj,
1324                                               &adev->gfx.rlc.clear_state_gpu_addr,
1325                                               (void **)&adev->gfx.rlc.cs_ptr);
1326                 if (r) {
1327                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1328                         gfx_v8_0_rlc_fini(adev);
1329                         return r;
1330                 }
1331
1332                 /* set up the cs buffer */
1333                 dst_ptr = adev->gfx.rlc.cs_ptr;
1334                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1335                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1336                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1337         }
1338
1339         if ((adev->asic_type == CHIP_CARRIZO) ||
1340             (adev->asic_type == CHIP_STONEY)) {
1341                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1342                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1343                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1344                                               &adev->gfx.rlc.cp_table_obj,
1345                                               &adev->gfx.rlc.cp_table_gpu_addr,
1346                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1347                 if (r) {
1348                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1349                         return r;
1350                 }
1351
1352                 cz_init_cp_jump_table(adev);
1353
1354                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1356         }
1357
1358         return 0;
1359 }
1360
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362 {
1363         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1364 }
1365
1366 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1367 {
1368         int r;
1369         u32 *hpd;
1370         size_t mec_hpd_size;
1371
1372         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1373
1374         /* take ownership of the relevant compute queues */
1375         amdgpu_gfx_compute_queue_acquire(adev);
1376
1377         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1378
1379         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1380                                       AMDGPU_GEM_DOMAIN_GTT,
1381                                       &adev->gfx.mec.hpd_eop_obj,
1382                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1383                                       (void **)&hpd);
1384         if (r) {
1385                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1386                 return r;
1387         }
1388
1389         memset(hpd, 0, mec_hpd_size);
1390
1391         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1392         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1393
1394         return 0;
1395 }
1396
1397 static const u32 vgpr_init_compute_shader[] =
1398 {
1399         0x7e000209, 0x7e020208,
1400         0x7e040207, 0x7e060206,
1401         0x7e080205, 0x7e0a0204,
1402         0x7e0c0203, 0x7e0e0202,
1403         0x7e100201, 0x7e120200,
1404         0x7e140209, 0x7e160208,
1405         0x7e180207, 0x7e1a0206,
1406         0x7e1c0205, 0x7e1e0204,
1407         0x7e200203, 0x7e220202,
1408         0x7e240201, 0x7e260200,
1409         0x7e280209, 0x7e2a0208,
1410         0x7e2c0207, 0x7e2e0206,
1411         0x7e300205, 0x7e320204,
1412         0x7e340203, 0x7e360202,
1413         0x7e380201, 0x7e3a0200,
1414         0x7e3c0209, 0x7e3e0208,
1415         0x7e400207, 0x7e420206,
1416         0x7e440205, 0x7e460204,
1417         0x7e480203, 0x7e4a0202,
1418         0x7e4c0201, 0x7e4e0200,
1419         0x7e500209, 0x7e520208,
1420         0x7e540207, 0x7e560206,
1421         0x7e580205, 0x7e5a0204,
1422         0x7e5c0203, 0x7e5e0202,
1423         0x7e600201, 0x7e620200,
1424         0x7e640209, 0x7e660208,
1425         0x7e680207, 0x7e6a0206,
1426         0x7e6c0205, 0x7e6e0204,
1427         0x7e700203, 0x7e720202,
1428         0x7e740201, 0x7e760200,
1429         0x7e780209, 0x7e7a0208,
1430         0x7e7c0207, 0x7e7e0206,
1431         0xbf8a0000, 0xbf810000,
1432 };
1433
1434 static const u32 sgpr_init_compute_shader[] =
1435 {
1436         0xbe8a0100, 0xbe8c0102,
1437         0xbe8e0104, 0xbe900106,
1438         0xbe920108, 0xbe940100,
1439         0xbe960102, 0xbe980104,
1440         0xbe9a0106, 0xbe9c0108,
1441         0xbe9e0100, 0xbea00102,
1442         0xbea20104, 0xbea40106,
1443         0xbea60108, 0xbea80100,
1444         0xbeaa0102, 0xbeac0104,
1445         0xbeae0106, 0xbeb00108,
1446         0xbeb20100, 0xbeb40102,
1447         0xbeb60104, 0xbeb80106,
1448         0xbeba0108, 0xbebc0100,
1449         0xbebe0102, 0xbec00104,
1450         0xbec20106, 0xbec40108,
1451         0xbec60100, 0xbec80102,
1452         0xbee60004, 0xbee70005,
1453         0xbeea0006, 0xbeeb0007,
1454         0xbee80008, 0xbee90009,
1455         0xbefc0000, 0xbf8a0000,
1456         0xbf810000, 0x00000000,
1457 };
1458
1459 static const u32 vgpr_init_regs[] =
1460 {
1461         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1462         mmCOMPUTE_RESOURCE_LIMITS, 0,
1463         mmCOMPUTE_NUM_THREAD_X, 256*4,
1464         mmCOMPUTE_NUM_THREAD_Y, 1,
1465         mmCOMPUTE_NUM_THREAD_Z, 1,
1466         mmCOMPUTE_PGM_RSRC2, 20,
1467         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1468         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1469         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1470         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1471         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1472         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1473         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1474         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1475         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1476         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1477 };
1478
1479 static const u32 sgpr1_init_regs[] =
1480 {
1481         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1482         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1483         mmCOMPUTE_NUM_THREAD_X, 256*5,
1484         mmCOMPUTE_NUM_THREAD_Y, 1,
1485         mmCOMPUTE_NUM_THREAD_Z, 1,
1486         mmCOMPUTE_PGM_RSRC2, 20,
1487         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1488         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1489         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1490         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1491         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1492         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1493         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1494         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1495         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1496         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1497 };
1498
1499 static const u32 sgpr2_init_regs[] =
1500 {
1501         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1502         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1503         mmCOMPUTE_NUM_THREAD_X, 256*5,
1504         mmCOMPUTE_NUM_THREAD_Y, 1,
1505         mmCOMPUTE_NUM_THREAD_Z, 1,
1506         mmCOMPUTE_PGM_RSRC2, 20,
1507         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1508         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1509         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1510         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1511         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1512         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1513         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1514         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1515         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1516         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1517 };
1518
1519 static const u32 sec_ded_counter_registers[] =
1520 {
1521         mmCPC_EDC_ATC_CNT,
1522         mmCPC_EDC_SCRATCH_CNT,
1523         mmCPC_EDC_UCODE_CNT,
1524         mmCPF_EDC_ATC_CNT,
1525         mmCPF_EDC_ROQ_CNT,
1526         mmCPF_EDC_TAG_CNT,
1527         mmCPG_EDC_ATC_CNT,
1528         mmCPG_EDC_DMA_CNT,
1529         mmCPG_EDC_TAG_CNT,
1530         mmDC_EDC_CSINVOC_CNT,
1531         mmDC_EDC_RESTORE_CNT,
1532         mmDC_EDC_STATE_CNT,
1533         mmGDS_EDC_CNT,
1534         mmGDS_EDC_GRBM_CNT,
1535         mmGDS_EDC_OA_DED,
1536         mmSPI_EDC_CNT,
1537         mmSQC_ATC_EDC_GATCL1_CNT,
1538         mmSQC_EDC_CNT,
1539         mmSQ_EDC_DED_CNT,
1540         mmSQ_EDC_INFO,
1541         mmSQ_EDC_SEC_CNT,
1542         mmTCC_EDC_CNT,
1543         mmTCP_ATC_EDC_GATCL1_CNT,
1544         mmTCP_EDC_CNT,
1545         mmTD_EDC_CNT
1546 };
1547
1548 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1549 {
1550         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1551         struct amdgpu_ib ib;
1552         struct dma_fence *f = NULL;
1553         int r, i;
1554         u32 tmp;
1555         unsigned total_size, vgpr_offset, sgpr_offset;
1556         u64 gpu_addr;
1557
1558         /* only supported on CZ */
1559         if (adev->asic_type != CHIP_CARRIZO)
1560                 return 0;
1561
1562         /* bail if the compute ring is not ready */
1563         if (!ring->ready)
1564                 return 0;
1565
1566         tmp = RREG32(mmGB_EDC_MODE);
1567         WREG32(mmGB_EDC_MODE, 0);
1568
1569         total_size =
1570                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1571         total_size +=
1572                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1573         total_size +=
1574                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1575         total_size = ALIGN(total_size, 256);
1576         vgpr_offset = total_size;
1577         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1578         sgpr_offset = total_size;
1579         total_size += sizeof(sgpr_init_compute_shader);
1580
1581         /* allocate an indirect buffer to put the commands in */
1582         memset(&ib, 0, sizeof(ib));
1583         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1584         if (r) {
1585                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1586                 return r;
1587         }
1588
1589         /* load the compute shaders */
1590         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1591                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1592
1593         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1594                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1595
1596         /* init the ib length to 0 */
1597         ib.length_dw = 0;
1598
1599         /* VGPR */
1600         /* write the register state for the compute dispatch */
1601         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1602                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1603                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1604                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1605         }
1606         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1607         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1608         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1609         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1610         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1611         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1612
1613         /* write dispatch packet */
1614         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1615         ib.ptr[ib.length_dw++] = 8; /* x */
1616         ib.ptr[ib.length_dw++] = 1; /* y */
1617         ib.ptr[ib.length_dw++] = 1; /* z */
1618         ib.ptr[ib.length_dw++] =
1619                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1620
1621         /* write CS partial flush packet */
1622         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1623         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1624
1625         /* SGPR1 */
1626         /* write the register state for the compute dispatch */
1627         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1628                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1630                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1631         }
1632         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1634         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1638
1639         /* write dispatch packet */
1640         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641         ib.ptr[ib.length_dw++] = 8; /* x */
1642         ib.ptr[ib.length_dw++] = 1; /* y */
1643         ib.ptr[ib.length_dw++] = 1; /* z */
1644         ib.ptr[ib.length_dw++] =
1645                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1646
1647         /* write CS partial flush packet */
1648         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1650
1651         /* SGPR2 */
1652         /* write the register state for the compute dispatch */
1653         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1654                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1656                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1657         }
1658         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1660         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664
1665         /* write dispatch packet */
1666         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667         ib.ptr[ib.length_dw++] = 8; /* x */
1668         ib.ptr[ib.length_dw++] = 1; /* y */
1669         ib.ptr[ib.length_dw++] = 1; /* z */
1670         ib.ptr[ib.length_dw++] =
1671                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672
1673         /* write CS partial flush packet */
1674         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676
1677         /* shedule the ib on the ring */
1678         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1679         if (r) {
1680                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1681                 goto fail;
1682         }
1683
1684         /* wait for the GPU to finish processing the IB */
1685         r = dma_fence_wait(f, false);
1686         if (r) {
1687                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1688                 goto fail;
1689         }
1690
1691         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1692         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1693         WREG32(mmGB_EDC_MODE, tmp);
1694
1695         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1696         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1697         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1698
1699
1700         /* read back registers to clear the counters */
1701         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1702                 RREG32(sec_ded_counter_registers[i]);
1703
1704 fail:
1705         amdgpu_ib_free(adev, &ib, NULL);
1706         dma_fence_put(f);
1707
1708         return r;
1709 }
1710
1711 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1712 {
1713         u32 gb_addr_config;
1714         u32 mc_shared_chmap, mc_arb_ramcfg;
1715         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1716         u32 tmp;
1717         int ret;
1718
1719         switch (adev->asic_type) {
1720         case CHIP_TOPAZ:
1721                 adev->gfx.config.max_shader_engines = 1;
1722                 adev->gfx.config.max_tile_pipes = 2;
1723                 adev->gfx.config.max_cu_per_sh = 6;
1724                 adev->gfx.config.max_sh_per_se = 1;
1725                 adev->gfx.config.max_backends_per_se = 2;
1726                 adev->gfx.config.max_texture_channel_caches = 2;
1727                 adev->gfx.config.max_gprs = 256;
1728                 adev->gfx.config.max_gs_threads = 32;
1729                 adev->gfx.config.max_hw_contexts = 8;
1730
1731                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1736                 break;
1737         case CHIP_FIJI:
1738                 adev->gfx.config.max_shader_engines = 4;
1739                 adev->gfx.config.max_tile_pipes = 16;
1740                 adev->gfx.config.max_cu_per_sh = 16;
1741                 adev->gfx.config.max_sh_per_se = 1;
1742                 adev->gfx.config.max_backends_per_se = 4;
1743                 adev->gfx.config.max_texture_channel_caches = 16;
1744                 adev->gfx.config.max_gprs = 256;
1745                 adev->gfx.config.max_gs_threads = 32;
1746                 adev->gfx.config.max_hw_contexts = 8;
1747
1748                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1749                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1750                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1751                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1752                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1753                 break;
1754         case CHIP_POLARIS11:
1755         case CHIP_POLARIS12:
1756                 ret = amdgpu_atombios_get_gfx_info(adev);
1757                 if (ret)
1758                         return ret;
1759                 adev->gfx.config.max_gprs = 256;
1760                 adev->gfx.config.max_gs_threads = 32;
1761                 adev->gfx.config.max_hw_contexts = 8;
1762
1763                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1768                 break;
1769         case CHIP_POLARIS10:
1770                 ret = amdgpu_atombios_get_gfx_info(adev);
1771                 if (ret)
1772                         return ret;
1773                 adev->gfx.config.max_gprs = 256;
1774                 adev->gfx.config.max_gs_threads = 32;
1775                 adev->gfx.config.max_hw_contexts = 8;
1776
1777                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1782                 break;
1783         case CHIP_TONGA:
1784                 adev->gfx.config.max_shader_engines = 4;
1785                 adev->gfx.config.max_tile_pipes = 8;
1786                 adev->gfx.config.max_cu_per_sh = 8;
1787                 adev->gfx.config.max_sh_per_se = 1;
1788                 adev->gfx.config.max_backends_per_se = 2;
1789                 adev->gfx.config.max_texture_channel_caches = 8;
1790                 adev->gfx.config.max_gprs = 256;
1791                 adev->gfx.config.max_gs_threads = 32;
1792                 adev->gfx.config.max_hw_contexts = 8;
1793
1794                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1799                 break;
1800         case CHIP_CARRIZO:
1801                 adev->gfx.config.max_shader_engines = 1;
1802                 adev->gfx.config.max_tile_pipes = 2;
1803                 adev->gfx.config.max_sh_per_se = 1;
1804                 adev->gfx.config.max_backends_per_se = 2;
1805                 adev->gfx.config.max_cu_per_sh = 8;
1806                 adev->gfx.config.max_texture_channel_caches = 2;
1807                 adev->gfx.config.max_gprs = 256;
1808                 adev->gfx.config.max_gs_threads = 32;
1809                 adev->gfx.config.max_hw_contexts = 8;
1810
1811                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1816                 break;
1817         case CHIP_STONEY:
1818                 adev->gfx.config.max_shader_engines = 1;
1819                 adev->gfx.config.max_tile_pipes = 2;
1820                 adev->gfx.config.max_sh_per_se = 1;
1821                 adev->gfx.config.max_backends_per_se = 1;
1822                 adev->gfx.config.max_cu_per_sh = 3;
1823                 adev->gfx.config.max_texture_channel_caches = 2;
1824                 adev->gfx.config.max_gprs = 256;
1825                 adev->gfx.config.max_gs_threads = 16;
1826                 adev->gfx.config.max_hw_contexts = 8;
1827
1828                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1833                 break;
1834         default:
1835                 adev->gfx.config.max_shader_engines = 2;
1836                 adev->gfx.config.max_tile_pipes = 4;
1837                 adev->gfx.config.max_cu_per_sh = 2;
1838                 adev->gfx.config.max_sh_per_se = 1;
1839                 adev->gfx.config.max_backends_per_se = 2;
1840                 adev->gfx.config.max_texture_channel_caches = 4;
1841                 adev->gfx.config.max_gprs = 256;
1842                 adev->gfx.config.max_gs_threads = 32;
1843                 adev->gfx.config.max_hw_contexts = 8;
1844
1845                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1850                 break;
1851         }
1852
1853         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1854         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1855         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1856
1857         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1858         adev->gfx.config.mem_max_burst_length_bytes = 256;
1859         if (adev->flags & AMD_IS_APU) {
1860                 /* Get memory bank mapping mode. */
1861                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1862                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1863                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1864
1865                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1866                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1867                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1868
1869                 /* Validate settings in case only one DIMM installed. */
1870                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1871                         dimm00_addr_map = 0;
1872                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1873                         dimm01_addr_map = 0;
1874                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1875                         dimm10_addr_map = 0;
1876                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1877                         dimm11_addr_map = 0;
1878
1879                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1880                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1881                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1882                         adev->gfx.config.mem_row_size_in_kb = 2;
1883                 else
1884                         adev->gfx.config.mem_row_size_in_kb = 1;
1885         } else {
1886                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1887                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1888                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1889                         adev->gfx.config.mem_row_size_in_kb = 4;
1890         }
1891
1892         adev->gfx.config.shader_engine_tile_size = 32;
1893         adev->gfx.config.num_gpus = 1;
1894         adev->gfx.config.multi_gpu_tile_size = 64;
1895
1896         /* fix up row size */
1897         switch (adev->gfx.config.mem_row_size_in_kb) {
1898         case 1:
1899         default:
1900                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1901                 break;
1902         case 2:
1903                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1904                 break;
1905         case 4:
1906                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1907                 break;
1908         }
1909         adev->gfx.config.gb_addr_config = gb_addr_config;
1910
1911         return 0;
1912 }
1913
1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1915                                         int mec, int pipe, int queue)
1916 {
1917         int r;
1918         unsigned irq_type;
1919         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1920
1921         ring = &adev->gfx.compute_ring[ring_id];
1922
1923         /* mec0 is me1 */
1924         ring->me = mec + 1;
1925         ring->pipe = pipe;
1926         ring->queue = queue;
1927
1928         ring->ring_obj = NULL;
1929         ring->use_doorbell = true;
1930         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1931         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1932                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1933         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1934
1935         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1936                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1937                 + ring->pipe;
1938
1939         /* type-2 packets are deprecated on MEC, use type-3 instead */
1940         r = amdgpu_ring_init(adev, ring, 1024,
1941                         &adev->gfx.eop_irq, irq_type);
1942         if (r)
1943                 return r;
1944
1945
1946         return 0;
1947 }
1948
1949 static int gfx_v8_0_sw_init(void *handle)
1950 {
1951         int i, j, k, r, ring_id;
1952         struct amdgpu_ring *ring;
1953         struct amdgpu_kiq *kiq;
1954         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1955
1956         switch (adev->asic_type) {
1957         case CHIP_FIJI:
1958         case CHIP_TONGA:
1959         case CHIP_POLARIS11:
1960         case CHIP_POLARIS12:
1961         case CHIP_POLARIS10:
1962         case CHIP_CARRIZO:
1963                 adev->gfx.mec.num_mec = 2;
1964                 break;
1965         case CHIP_TOPAZ:
1966         case CHIP_STONEY:
1967         default:
1968                 adev->gfx.mec.num_mec = 1;
1969                 break;
1970         }
1971
1972         adev->gfx.mec.num_pipe_per_mec = 4;
1973         adev->gfx.mec.num_queue_per_pipe = 8;
1974
1975         /* KIQ event */
1976         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1977         if (r)
1978                 return r;
1979
1980         /* EOP Event */
1981         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1982         if (r)
1983                 return r;
1984
1985         /* Privileged reg */
1986         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1987                               &adev->gfx.priv_reg_irq);
1988         if (r)
1989                 return r;
1990
1991         /* Privileged inst */
1992         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1993                               &adev->gfx.priv_inst_irq);
1994         if (r)
1995                 return r;
1996
1997         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1998
1999         gfx_v8_0_scratch_init(adev);
2000
2001         r = gfx_v8_0_init_microcode(adev);
2002         if (r) {
2003                 DRM_ERROR("Failed to load gfx firmware!\n");
2004                 return r;
2005         }
2006
2007         r = gfx_v8_0_rlc_init(adev);
2008         if (r) {
2009                 DRM_ERROR("Failed to init rlc BOs!\n");
2010                 return r;
2011         }
2012
2013         r = gfx_v8_0_mec_init(adev);
2014         if (r) {
2015                 DRM_ERROR("Failed to init MEC BOs!\n");
2016                 return r;
2017         }
2018
2019         /* set up the gfx ring */
2020         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2021                 ring = &adev->gfx.gfx_ring[i];
2022                 ring->ring_obj = NULL;
2023                 sprintf(ring->name, "gfx");
2024                 /* no gfx doorbells on iceland */
2025                 if (adev->asic_type != CHIP_TOPAZ) {
2026                         ring->use_doorbell = true;
2027                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2028                 }
2029
2030                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2031                                      AMDGPU_CP_IRQ_GFX_EOP);
2032                 if (r)
2033                         return r;
2034         }
2035
2036
2037         /* set up the compute queues - allocate horizontally across pipes */
2038         ring_id = 0;
2039         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2040                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2041                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2042                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2043                                         continue;
2044
2045                                 r = gfx_v8_0_compute_ring_init(adev,
2046                                                                 ring_id,
2047                                                                 i, k, j);
2048                                 if (r)
2049                                         return r;
2050
2051                                 ring_id++;
2052                         }
2053                 }
2054         }
2055
2056         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2057         if (r) {
2058                 DRM_ERROR("Failed to init KIQ BOs!\n");
2059                 return r;
2060         }
2061
2062         kiq = &adev->gfx.kiq;
2063         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2064         if (r)
2065                 return r;
2066
2067         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2068         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2069         if (r)
2070                 return r;
2071
2072         /* reserve GDS, GWS and OA resource for gfx */
2073         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2074                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2075                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2076         if (r)
2077                 return r;
2078
2079         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2080                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2081                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2082         if (r)
2083                 return r;
2084
2085         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2086                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2087                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2088         if (r)
2089                 return r;
2090
2091         adev->gfx.ce_ram_size = 0x8000;
2092
2093         r = gfx_v8_0_gpu_early_init(adev);
2094         if (r)
2095                 return r;
2096
2097         return 0;
2098 }
2099
2100 static int gfx_v8_0_sw_fini(void *handle)
2101 {
2102         int i;
2103         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2104
2105         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2106         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2107         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2108
2109         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2110                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2111         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2112                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2113
2114         amdgpu_gfx_compute_mqd_sw_fini(adev);
2115         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2116         amdgpu_gfx_kiq_fini(adev);
2117         amdgpu_bo_free_kernel(&adev->virt.csa_obj, &adev->virt.csa_vmid0_addr, NULL);
2118
2119         gfx_v8_0_mec_fini(adev);
2120         gfx_v8_0_rlc_fini(adev);
2121         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2122                                 &adev->gfx.rlc.clear_state_gpu_addr,
2123                                 (void **)&adev->gfx.rlc.cs_ptr);
2124         if ((adev->asic_type == CHIP_CARRIZO) ||
2125             (adev->asic_type == CHIP_STONEY)) {
2126                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2127                                 &adev->gfx.rlc.cp_table_gpu_addr,
2128                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2129         }
2130         gfx_v8_0_free_microcode(adev);
2131
2132         return 0;
2133 }
2134
2135 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2136 {
2137         uint32_t *modearray, *mod2array;
2138         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2139         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2140         u32 reg_offset;
2141
2142         modearray = adev->gfx.config.tile_mode_array;
2143         mod2array = adev->gfx.config.macrotile_mode_array;
2144
2145         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2146                 modearray[reg_offset] = 0;
2147
2148         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2149                 mod2array[reg_offset] = 0;
2150
2151         switch (adev->asic_type) {
2152         case CHIP_TOPAZ:
2153                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2154                                 PIPE_CONFIG(ADDR_SURF_P2) |
2155                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2156                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2157                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2158                                 PIPE_CONFIG(ADDR_SURF_P2) |
2159                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2160                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2161                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2162                                 PIPE_CONFIG(ADDR_SURF_P2) |
2163                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2164                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2165                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2166                                 PIPE_CONFIG(ADDR_SURF_P2) |
2167                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2168                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2169                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2170                                 PIPE_CONFIG(ADDR_SURF_P2) |
2171                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2172                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2173                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2174                                 PIPE_CONFIG(ADDR_SURF_P2) |
2175                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2176                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2177                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2178                                 PIPE_CONFIG(ADDR_SURF_P2) |
2179                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2180                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2181                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2182                                 PIPE_CONFIG(ADDR_SURF_P2));
2183                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2184                                 PIPE_CONFIG(ADDR_SURF_P2) |
2185                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2186                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2187                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2188                                  PIPE_CONFIG(ADDR_SURF_P2) |
2189                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2190                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2191                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2192                                  PIPE_CONFIG(ADDR_SURF_P2) |
2193                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2194                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2195                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2196                                  PIPE_CONFIG(ADDR_SURF_P2) |
2197                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2198                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2199                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2200                                  PIPE_CONFIG(ADDR_SURF_P2) |
2201                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2202                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2203                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2204                                  PIPE_CONFIG(ADDR_SURF_P2) |
2205                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2206                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2207                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2208                                  PIPE_CONFIG(ADDR_SURF_P2) |
2209                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2210                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2211                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2212                                  PIPE_CONFIG(ADDR_SURF_P2) |
2213                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2214                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2215                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2216                                  PIPE_CONFIG(ADDR_SURF_P2) |
2217                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2218                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2219                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2220                                  PIPE_CONFIG(ADDR_SURF_P2) |
2221                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2222                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2223                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2224                                  PIPE_CONFIG(ADDR_SURF_P2) |
2225                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2226                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2227                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2228                                  PIPE_CONFIG(ADDR_SURF_P2) |
2229                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2230                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2231                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2232                                  PIPE_CONFIG(ADDR_SURF_P2) |
2233                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2234                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2235                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2236                                  PIPE_CONFIG(ADDR_SURF_P2) |
2237                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2238                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2239                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2240                                  PIPE_CONFIG(ADDR_SURF_P2) |
2241                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2242                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2243                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2244                                  PIPE_CONFIG(ADDR_SURF_P2) |
2245                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2246                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2247                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2248                                  PIPE_CONFIG(ADDR_SURF_P2) |
2249                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2250                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2251                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2252                                  PIPE_CONFIG(ADDR_SURF_P2) |
2253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2255
2256                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2257                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2258                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2259                                 NUM_BANKS(ADDR_SURF_8_BANK));
2260                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2261                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2262                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2263                                 NUM_BANKS(ADDR_SURF_8_BANK));
2264                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2265                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2266                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2267                                 NUM_BANKS(ADDR_SURF_8_BANK));
2268                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2269                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2270                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2271                                 NUM_BANKS(ADDR_SURF_8_BANK));
2272                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2273                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2274                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2275                                 NUM_BANKS(ADDR_SURF_8_BANK));
2276                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2277                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2278                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2279                                 NUM_BANKS(ADDR_SURF_8_BANK));
2280                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2281                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2282                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2283                                 NUM_BANKS(ADDR_SURF_8_BANK));
2284                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2285                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2286                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2287                                 NUM_BANKS(ADDR_SURF_16_BANK));
2288                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2289                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2290                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2291                                 NUM_BANKS(ADDR_SURF_16_BANK));
2292                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2293                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2294                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2295                                  NUM_BANKS(ADDR_SURF_16_BANK));
2296                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2297                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2298                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2299                                  NUM_BANKS(ADDR_SURF_16_BANK));
2300                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2301                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2302                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2303                                  NUM_BANKS(ADDR_SURF_16_BANK));
2304                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2305                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2306                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2307                                  NUM_BANKS(ADDR_SURF_16_BANK));
2308                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2309                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2310                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2311                                  NUM_BANKS(ADDR_SURF_8_BANK));
2312
2313                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2314                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2315                             reg_offset != 23)
2316                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2317
2318                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2319                         if (reg_offset != 7)
2320                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2321
2322                 break;
2323         case CHIP_FIJI:
2324                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2327                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2328                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2330                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2331                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2332                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2333                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2335                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2336                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2339                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2340                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2343                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2344                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2345                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2347                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2350                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2351                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2352                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2353                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2354                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2355                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2356                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2357                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2358                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2359                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2361                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2362                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2363                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2365                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2366                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2367                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2369                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2370                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2371                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2372                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2373                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2374                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2375                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2377                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2378                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2380                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2381                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2382                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2383                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2384                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2385                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2386                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2387                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2388                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2389                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2390                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2391                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2392                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2393                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2394                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2395                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2397                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2398                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2399                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2401                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2402                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2403                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2405                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2406                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2407                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2408                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2409                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2410                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2411                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2413                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2414                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2415                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2416                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2417                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2418                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2419                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2421                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2422                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2423                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2425                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2426                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2427                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2428                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2429                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2430                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2431                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2436                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2437                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2440                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2441                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2442                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2444                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2445                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2446
2447                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2449                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2450                                 NUM_BANKS(ADDR_SURF_8_BANK));
2451                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2452                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2453                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2454                                 NUM_BANKS(ADDR_SURF_8_BANK));
2455                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2456                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2457                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2458                                 NUM_BANKS(ADDR_SURF_8_BANK));
2459                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2461                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2462                                 NUM_BANKS(ADDR_SURF_8_BANK));
2463                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2465                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2466                                 NUM_BANKS(ADDR_SURF_8_BANK));
2467                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2468                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2469                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2470                                 NUM_BANKS(ADDR_SURF_8_BANK));
2471                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2473                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2474                                 NUM_BANKS(ADDR_SURF_8_BANK));
2475                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2476                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2477                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2478                                 NUM_BANKS(ADDR_SURF_8_BANK));
2479                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2480                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2481                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2482                                 NUM_BANKS(ADDR_SURF_8_BANK));
2483                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2485                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2486                                  NUM_BANKS(ADDR_SURF_8_BANK));
2487                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2488                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2489                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2490                                  NUM_BANKS(ADDR_SURF_8_BANK));
2491                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2492                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2493                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2494                                  NUM_BANKS(ADDR_SURF_8_BANK));
2495                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2498                                  NUM_BANKS(ADDR_SURF_8_BANK));
2499                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2500                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2501                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2502                                  NUM_BANKS(ADDR_SURF_4_BANK));
2503
2504                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2505                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2506
2507                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2508                         if (reg_offset != 7)
2509                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2510
2511                 break;
2512         case CHIP_TONGA:
2513                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2516                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2517                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2520                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2521                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2524                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2525                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2528                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2529                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2532                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2533                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2536                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2537                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2540                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2541                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2542                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2543                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2544                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2545                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2546                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2547                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2548                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2549                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2550                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2551                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2553                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2554                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2557                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2558                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2559                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2560                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2561                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2562                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2563                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2567                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2568                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2571                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2572                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2573                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2575                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2576                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2578                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2579                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2580                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2581                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2582                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2583                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2584                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2585                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2586                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2587                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2588                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2589                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2590                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2591                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2592                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2593                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2594                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2595                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2596                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2597                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2598                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2599                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2600                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2601                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2602                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2603                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2604                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2605                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2606                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2607                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2608                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2609                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2610                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2611                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2612                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2613                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2614                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2615                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2616                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2617                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2618                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2619                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2620                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2621                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2622                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2623                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2624                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2625                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2626                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2627                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2628                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2630                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2631                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2632                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2633                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2634                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2635
2636                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2638                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2639                                 NUM_BANKS(ADDR_SURF_16_BANK));
2640                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2642                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2643                                 NUM_BANKS(ADDR_SURF_16_BANK));
2644                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2646                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2647                                 NUM_BANKS(ADDR_SURF_16_BANK));
2648                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2649                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2650                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2651                                 NUM_BANKS(ADDR_SURF_16_BANK));
2652                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2653                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2654                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2655                                 NUM_BANKS(ADDR_SURF_16_BANK));
2656                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2657                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2658                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2659                                 NUM_BANKS(ADDR_SURF_16_BANK));
2660                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2661                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2662                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2663                                 NUM_BANKS(ADDR_SURF_16_BANK));
2664                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2665                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2666                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2667                                 NUM_BANKS(ADDR_SURF_16_BANK));
2668                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2670                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2671                                 NUM_BANKS(ADDR_SURF_16_BANK));
2672                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2674                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2675                                  NUM_BANKS(ADDR_SURF_16_BANK));
2676                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2677                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2678                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2679                                  NUM_BANKS(ADDR_SURF_16_BANK));
2680                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2681                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2682                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2683                                  NUM_BANKS(ADDR_SURF_8_BANK));
2684                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2685                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2686                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2687                                  NUM_BANKS(ADDR_SURF_4_BANK));
2688                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2689                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2690                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2691                                  NUM_BANKS(ADDR_SURF_4_BANK));
2692
2693                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2694                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2695
2696                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2697                         if (reg_offset != 7)
2698                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2699
2700                 break;
2701         case CHIP_POLARIS11:
2702         case CHIP_POLARIS12:
2703                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2706                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2707                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2710                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2711                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2714                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2715                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2718                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2719                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2722                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2723                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2726                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2727                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2730                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2731                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2734                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2735                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2737                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2738                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2739                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2740                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2741                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2743                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2744                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2745                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2746                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2747                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2748                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2749                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2750                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2751                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2752                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2753                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2754                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2755                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2756                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2757                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2758                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2759                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2760                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2761                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2762                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2763                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2764                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2765                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2766                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2767                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2768                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2769                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2770                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2771                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2772                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2773                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2774                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2775                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2776                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2777                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2778                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2780                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2781                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2782                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2784                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2785                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2786                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2788                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2789                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2790                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2792                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2793                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2794                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2795                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2796                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2797                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2798                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2799                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2800                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2801                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2802                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2803                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2804                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2805                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2806                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2807                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2808                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2809                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2810                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2811                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2812                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2813                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2814                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2815                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2816                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2817                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2818                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2819                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2820                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2821                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2822                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2823                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2824                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2825
2826                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2828                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829                                 NUM_BANKS(ADDR_SURF_16_BANK));
2830
2831                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2832                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2833                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2834                                 NUM_BANKS(ADDR_SURF_16_BANK));
2835
2836                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2837                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2838                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839                                 NUM_BANKS(ADDR_SURF_16_BANK));
2840
2841                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2843                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2844                                 NUM_BANKS(ADDR_SURF_16_BANK));
2845
2846                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2847                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2848                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2849                                 NUM_BANKS(ADDR_SURF_16_BANK));
2850
2851                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2853                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2854                                 NUM_BANKS(ADDR_SURF_16_BANK));
2855
2856                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2859                                 NUM_BANKS(ADDR_SURF_16_BANK));
2860
2861                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2862                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2863                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2864                                 NUM_BANKS(ADDR_SURF_16_BANK));
2865
2866                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2867                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2868                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2869                                 NUM_BANKS(ADDR_SURF_16_BANK));
2870
2871                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2872                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2873                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2874                                 NUM_BANKS(ADDR_SURF_16_BANK));
2875
2876                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2877                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2878                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2879                                 NUM_BANKS(ADDR_SURF_16_BANK));
2880
2881                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2882                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2883                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2884                                 NUM_BANKS(ADDR_SURF_16_BANK));
2885
2886                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2887                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2888                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2889                                 NUM_BANKS(ADDR_SURF_8_BANK));
2890
2891                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2893                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2894                                 NUM_BANKS(ADDR_SURF_4_BANK));
2895
2896                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2897                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2898
2899                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2900                         if (reg_offset != 7)
2901                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2902
2903                 break;
2904         case CHIP_POLARIS10:
2905                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2908                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2909                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2911                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2912                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2913                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2916                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2917                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2920                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2921                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2924                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2925                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2928                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2929                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2931                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2932                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2933                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2934                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2935                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2936                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2937                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2938                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2939                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2940                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2941                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2943                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2944                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2945                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2946                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2947                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2948                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2949                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2950                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2951                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2953                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2954                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2955                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2956                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2957                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2958                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2960                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2961                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2962                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2963                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2964                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2965                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2966                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2968                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2969                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2970                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2971                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2972                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2973                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2974                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2975                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2976                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2977                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2978                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2979                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2980                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2981                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2982                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2983                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2984                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2985                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2986                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2987                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2988                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2990                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2991                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2992                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2993                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2994                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2995                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2996                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2997                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2998                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2999                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3000                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3001                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3002                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3003                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3004                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3005                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3006                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3007                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3008                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3009                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3010                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3011                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3012                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3013                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3014                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3015                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3016                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3017                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3018                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3019                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3020                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3021                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3022                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3023                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3024                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3025                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3026                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3027
3028                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3029                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3030                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3031                                 NUM_BANKS(ADDR_SURF_16_BANK));
3032
3033                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3034                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3035                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3036                                 NUM_BANKS(ADDR_SURF_16_BANK));
3037
3038                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3039                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3040                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3041                                 NUM_BANKS(ADDR_SURF_16_BANK));
3042
3043                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3045                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3046                                 NUM_BANKS(ADDR_SURF_16_BANK));
3047
3048                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3049                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3050                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3051                                 NUM_BANKS(ADDR_SURF_16_BANK));
3052
3053                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3054                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3055                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3056                                 NUM_BANKS(ADDR_SURF_16_BANK));
3057
3058                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3059                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3060                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3061                                 NUM_BANKS(ADDR_SURF_16_BANK));
3062
3063                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3064                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3065                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3066                                 NUM_BANKS(ADDR_SURF_16_BANK));
3067
3068                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3069                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3070                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3071                                 NUM_BANKS(ADDR_SURF_16_BANK));
3072
3073                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3075                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3076                                 NUM_BANKS(ADDR_SURF_16_BANK));
3077
3078                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3079                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3080                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3081                                 NUM_BANKS(ADDR_SURF_16_BANK));
3082
3083                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3084                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3085                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3086                                 NUM_BANKS(ADDR_SURF_8_BANK));
3087
3088                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3089                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3090                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3091                                 NUM_BANKS(ADDR_SURF_4_BANK));
3092
3093                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3096                                 NUM_BANKS(ADDR_SURF_4_BANK));
3097
3098                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3099                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3100
3101                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3102                         if (reg_offset != 7)
3103                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3104
3105                 break;
3106         case CHIP_STONEY:
3107                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3108                                 PIPE_CONFIG(ADDR_SURF_P2) |
3109                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3110                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3111                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3112                                 PIPE_CONFIG(ADDR_SURF_P2) |
3113                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3114                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3115                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3116                                 PIPE_CONFIG(ADDR_SURF_P2) |
3117                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3118                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3119                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3120                                 PIPE_CONFIG(ADDR_SURF_P2) |
3121                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3122                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3123                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3124                                 PIPE_CONFIG(ADDR_SURF_P2) |
3125                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3126                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3127                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3128                                 PIPE_CONFIG(ADDR_SURF_P2) |
3129                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3130                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3131                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3132                                 PIPE_CONFIG(ADDR_SURF_P2) |
3133                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3134                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3135                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3136                                 PIPE_CONFIG(ADDR_SURF_P2));
3137                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3138                                 PIPE_CONFIG(ADDR_SURF_P2) |
3139                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3140                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3141                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3142                                  PIPE_CONFIG(ADDR_SURF_P2) |
3143                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3144                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3145                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3146                                  PIPE_CONFIG(ADDR_SURF_P2) |
3147                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3148                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3149                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3150                                  PIPE_CONFIG(ADDR_SURF_P2) |
3151                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3152                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3153                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3154                                  PIPE_CONFIG(ADDR_SURF_P2) |
3155                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3156                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3157                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3158                                  PIPE_CONFIG(ADDR_SURF_P2) |
3159                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3160                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3161                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3162                                  PIPE_CONFIG(ADDR_SURF_P2) |
3163                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3164                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3165                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3166                                  PIPE_CONFIG(ADDR_SURF_P2) |
3167                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3168                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3169                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3170                                  PIPE_CONFIG(ADDR_SURF_P2) |
3171                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3172                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3173                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3174                                  PIPE_CONFIG(ADDR_SURF_P2) |
3175                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3176                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3177                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3178                                  PIPE_CONFIG(ADDR_SURF_P2) |
3179                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3180                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3181                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3182                                  PIPE_CONFIG(ADDR_SURF_P2) |
3183                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3184                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3185                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3186                                  PIPE_CONFIG(ADDR_SURF_P2) |
3187                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3188                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3189                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3190                                  PIPE_CONFIG(ADDR_SURF_P2) |
3191                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3192                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3193                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3194                                  PIPE_CONFIG(ADDR_SURF_P2) |
3195                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3196                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3197                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3198                                  PIPE_CONFIG(ADDR_SURF_P2) |
3199                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3200                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3201                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3202                                  PIPE_CONFIG(ADDR_SURF_P2) |
3203                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3204                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3205                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3206                                  PIPE_CONFIG(ADDR_SURF_P2) |
3207                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3208                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3209
3210                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3211                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3212                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3213                                 NUM_BANKS(ADDR_SURF_8_BANK));
3214                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3215                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3216                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3217                                 NUM_BANKS(ADDR_SURF_8_BANK));
3218                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3219                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3220                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3221                                 NUM_BANKS(ADDR_SURF_8_BANK));
3222                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3223                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3224                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3225                                 NUM_BANKS(ADDR_SURF_8_BANK));
3226                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3227                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3228                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3229                                 NUM_BANKS(ADDR_SURF_8_BANK));
3230                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3231                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3232                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3233                                 NUM_BANKS(ADDR_SURF_8_BANK));
3234                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3235                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3236                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3237                                 NUM_BANKS(ADDR_SURF_8_BANK));
3238                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3239                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3240                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3241                                 NUM_BANKS(ADDR_SURF_16_BANK));
3242                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3243                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3244                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3245                                 NUM_BANKS(ADDR_SURF_16_BANK));
3246                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3247                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3248                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3249                                  NUM_BANKS(ADDR_SURF_16_BANK));
3250                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3251                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3252                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3253                                  NUM_BANKS(ADDR_SURF_16_BANK));
3254                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3255                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3256                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3257                                  NUM_BANKS(ADDR_SURF_16_BANK));
3258                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3259                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3260                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3261                                  NUM_BANKS(ADDR_SURF_16_BANK));
3262                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3263                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3264                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3265                                  NUM_BANKS(ADDR_SURF_8_BANK));
3266
3267                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3268                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3269                             reg_offset != 23)
3270                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3271
3272                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3273                         if (reg_offset != 7)
3274                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3275
3276                 break;
3277         default:
3278                 dev_warn(adev->dev,
3279                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3280                          adev->asic_type);
3281
3282         case CHIP_CARRIZO:
3283                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3284                                 PIPE_CONFIG(ADDR_SURF_P2) |
3285                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3286                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3287                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3288                                 PIPE_CONFIG(ADDR_SURF_P2) |
3289                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3290                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3291                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3292                                 PIPE_CONFIG(ADDR_SURF_P2) |
3293                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3294                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3295                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3296                                 PIPE_CONFIG(ADDR_SURF_P2) |
3297                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3298                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3299                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3300                                 PIPE_CONFIG(ADDR_SURF_P2) |
3301                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3302                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3303                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3304                                 PIPE_CONFIG(ADDR_SURF_P2) |
3305                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3306                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3307                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3308                                 PIPE_CONFIG(ADDR_SURF_P2) |
3309                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3310                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3311                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3312                                 PIPE_CONFIG(ADDR_SURF_P2));
3313                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3314                                 PIPE_CONFIG(ADDR_SURF_P2) |
3315                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3316                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3317                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3318                                  PIPE_CONFIG(ADDR_SURF_P2) |
3319                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3320                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3321                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3322                                  PIPE_CONFIG(ADDR_SURF_P2) |
3323                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3324                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3325                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3326                                  PIPE_CONFIG(ADDR_SURF_P2) |
3327                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3328                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3329                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3330                                  PIPE_CONFIG(ADDR_SURF_P2) |
3331                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3332                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3333                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3334                                  PIPE_CONFIG(ADDR_SURF_P2) |
3335                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3336                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3337                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3338                                  PIPE_CONFIG(ADDR_SURF_P2) |
3339                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3340                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3341                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3342                                  PIPE_CONFIG(ADDR_SURF_P2) |
3343                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3344                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3345                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3346                                  PIPE_CONFIG(ADDR_SURF_P2) |
3347                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3348                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3349                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3350                                  PIPE_CONFIG(ADDR_SURF_P2) |
3351                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3352                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3353                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3354                                  PIPE_CONFIG(ADDR_SURF_P2) |
3355                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3356                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3357                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3358                                  PIPE_CONFIG(ADDR_SURF_P2) |
3359                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3360                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3361                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3362                                  PIPE_CONFIG(ADDR_SURF_P2) |
3363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3365                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3366                                  PIPE_CONFIG(ADDR_SURF_P2) |
3367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3369                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3370                                  PIPE_CONFIG(ADDR_SURF_P2) |
3371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3373                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3374                                  PIPE_CONFIG(ADDR_SURF_P2) |
3375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3377                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3378                                  PIPE_CONFIG(ADDR_SURF_P2) |
3379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3381                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3382                                  PIPE_CONFIG(ADDR_SURF_P2) |
3383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3385
3386                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3387                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3388                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3389                                 NUM_BANKS(ADDR_SURF_8_BANK));
3390                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3391                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3392                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3393                                 NUM_BANKS(ADDR_SURF_8_BANK));
3394                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3395                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3396                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3397                                 NUM_BANKS(ADDR_SURF_8_BANK));
3398                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3399                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3400                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3401                                 NUM_BANKS(ADDR_SURF_8_BANK));
3402                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3403                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3404                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3405                                 NUM_BANKS(ADDR_SURF_8_BANK));
3406                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3407                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3408                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3409                                 NUM_BANKS(ADDR_SURF_8_BANK));
3410                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3411                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3412                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3413                                 NUM_BANKS(ADDR_SURF_8_BANK));
3414                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3415                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3416                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3417                                 NUM_BANKS(ADDR_SURF_16_BANK));
3418                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3419                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3420                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3421                                 NUM_BANKS(ADDR_SURF_16_BANK));
3422                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3423                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3424                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3425                                  NUM_BANKS(ADDR_SURF_16_BANK));
3426                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3427                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3428                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3429                                  NUM_BANKS(ADDR_SURF_16_BANK));
3430                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3431                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3432                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3433                                  NUM_BANKS(ADDR_SURF_16_BANK));
3434                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3435                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3436                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3437                                  NUM_BANKS(ADDR_SURF_16_BANK));
3438                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3439                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3440                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3441                                  NUM_BANKS(ADDR_SURF_8_BANK));
3442
3443                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3444                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3445                             reg_offset != 23)
3446                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3447
3448                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3449                         if (reg_offset != 7)
3450                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3451
3452                 break;
3453         }
3454 }
3455
3456 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3457                                   u32 se_num, u32 sh_num, u32 instance)
3458 {
3459         u32 data;
3460
3461         if (instance == 0xffffffff)
3462                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3463         else
3464                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3465
3466         if (se_num == 0xffffffff)
3467                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3468         else
3469                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3470
3471         if (sh_num == 0xffffffff)
3472                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3473         else
3474                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3475
3476         WREG32(mmGRBM_GFX_INDEX, data);
3477 }
3478
3479 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3480 {
3481         u32 data, mask;
3482
3483         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3484                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3485
3486         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3487
3488         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3489                                          adev->gfx.config.max_sh_per_se);
3490
3491         return (~data) & mask;
3492 }
3493
3494 static void
3495 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3496 {
3497         switch (adev->asic_type) {
3498         case CHIP_FIJI:
3499                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3500                           RB_XSEL2(1) | PKR_MAP(2) |
3501                           PKR_XSEL(1) | PKR_YSEL(1) |
3502                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3503                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3504                            SE_PAIR_YSEL(2);
3505                 break;
3506         case CHIP_TONGA:
3507         case CHIP_POLARIS10:
3508                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3509                           SE_XSEL(1) | SE_YSEL(1);
3510                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3511                            SE_PAIR_YSEL(2);
3512                 break;
3513         case CHIP_TOPAZ:
3514         case CHIP_CARRIZO:
3515                 *rconf |= RB_MAP_PKR0(2);
3516                 *rconf1 |= 0x0;
3517                 break;
3518         case CHIP_POLARIS11:
3519         case CHIP_POLARIS12:
3520                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3521                           SE_XSEL(1) | SE_YSEL(1);
3522                 *rconf1 |= 0x0;
3523                 break;
3524         case CHIP_STONEY:
3525                 *rconf |= 0x0;
3526                 *rconf1 |= 0x0;
3527                 break;
3528         default:
3529                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3530                 break;
3531         }
3532 }
3533
3534 static void
3535 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3536                                         u32 raster_config, u32 raster_config_1,
3537                                         unsigned rb_mask, unsigned num_rb)
3538 {
3539         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3540         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3541         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3542         unsigned rb_per_se = num_rb / num_se;
3543         unsigned se_mask[4];
3544         unsigned se;
3545
3546         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3547         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3548         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3549         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3550
3551         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3552         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3553         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3554
3555         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3556                              (!se_mask[2] && !se_mask[3]))) {
3557                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3558
3559                 if (!se_mask[0] && !se_mask[1]) {
3560                         raster_config_1 |=
3561                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3562                 } else {
3563                         raster_config_1 |=
3564                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3565                 }
3566         }
3567
3568         for (se = 0; se < num_se; se++) {
3569                 unsigned raster_config_se = raster_config;
3570                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3571                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3572                 int idx = (se / 2) * 2;
3573
3574                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3575                         raster_config_se &= ~SE_MAP_MASK;
3576
3577                         if (!se_mask[idx]) {
3578                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3579                         } else {
3580                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3581                         }
3582                 }
3583
3584                 pkr0_mask &= rb_mask;
3585                 pkr1_mask &= rb_mask;
3586                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3587                         raster_config_se &= ~PKR_MAP_MASK;
3588
3589                         if (!pkr0_mask) {
3590                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3591                         } else {
3592                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3593                         }
3594                 }
3595
3596                 if (rb_per_se >= 2) {
3597                         unsigned rb0_mask = 1 << (se * rb_per_se);
3598                         unsigned rb1_mask = rb0_mask << 1;
3599
3600                         rb0_mask &= rb_mask;
3601                         rb1_mask &= rb_mask;
3602                         if (!rb0_mask || !rb1_mask) {
3603                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3604
3605                                 if (!rb0_mask) {
3606                                         raster_config_se |=
3607                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3608                                 } else {
3609                                         raster_config_se |=
3610                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3611                                 }
3612                         }
3613
3614                         if (rb_per_se > 2) {
3615                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3616                                 rb1_mask = rb0_mask << 1;
3617                                 rb0_mask &= rb_mask;
3618                                 rb1_mask &= rb_mask;
3619                                 if (!rb0_mask || !rb1_mask) {
3620                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3621
3622                                         if (!rb0_mask) {
3623                                                 raster_config_se |=
3624                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3625                                         } else {
3626                                                 raster_config_se |=
3627                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3628                                         }
3629                                 }
3630                         }
3631                 }
3632
3633                 /* GRBM_GFX_INDEX has a different offset on VI */
3634                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3635                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3636                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3637         }
3638
3639         /* GRBM_GFX_INDEX has a different offset on VI */
3640         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3641 }
3642
3643 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3644 {
3645         int i, j;
3646         u32 data;
3647         u32 raster_config = 0, raster_config_1 = 0;
3648         u32 active_rbs = 0;
3649         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3650                                         adev->gfx.config.max_sh_per_se;
3651         unsigned num_rb_pipes;
3652
3653         mutex_lock(&adev->grbm_idx_mutex);
3654         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3655                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3656                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3657                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3658                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3659                                                rb_bitmap_width_per_sh);
3660                 }
3661         }
3662         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3663
3664         adev->gfx.config.backend_enable_mask = active_rbs;
3665         adev->gfx.config.num_rbs = hweight32(active_rbs);
3666
3667         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3668                              adev->gfx.config.max_shader_engines, 16);
3669
3670         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3671
3672         if (!adev->gfx.config.backend_enable_mask ||
3673                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3674                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3675                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3676         } else {
3677                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3678                                                         adev->gfx.config.backend_enable_mask,
3679                                                         num_rb_pipes);
3680         }
3681
3682         /* cache the values for userspace */
3683         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3684                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3685                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3686                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3687                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3688                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3689                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3690                         adev->gfx.config.rb_config[i][j].raster_config =
3691                                 RREG32(mmPA_SC_RASTER_CONFIG);
3692                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3693                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3694                 }
3695         }
3696         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3697         mutex_unlock(&adev->grbm_idx_mutex);
3698 }
3699
3700 /**
3701  * gfx_v8_0_init_compute_vmid - gart enable
3702  *
3703  * @adev: amdgpu_device pointer
3704  *
3705  * Initialize compute vmid sh_mem registers
3706  *
3707  */
3708 #define DEFAULT_SH_MEM_BASES    (0x6000)
3709 #define FIRST_COMPUTE_VMID      (8)
3710 #define LAST_COMPUTE_VMID       (16)
3711 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3712 {
3713         int i;
3714         uint32_t sh_mem_config;
3715         uint32_t sh_mem_bases;
3716
3717         /*
3718          * Configure apertures:
3719          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3720          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3721          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3722          */
3723         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3724
3725         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3726                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3727                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3728                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3729                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3730                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3731
3732         mutex_lock(&adev->srbm_mutex);
3733         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3734                 vi_srbm_select(adev, 0, 0, 0, i);
3735                 /* CP and shaders */
3736                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3737                 WREG32(mmSH_MEM_APE1_BASE, 1);
3738                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3739                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3740         }
3741         vi_srbm_select(adev, 0, 0, 0, 0);
3742         mutex_unlock(&adev->srbm_mutex);
3743 }
3744
3745 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3746 {
3747         switch (adev->asic_type) {
3748         default:
3749                 adev->gfx.config.double_offchip_lds_buf = 1;
3750                 break;
3751         case CHIP_CARRIZO:
3752         case CHIP_STONEY:
3753                 adev->gfx.config.double_offchip_lds_buf = 0;
3754                 break;
3755         }
3756 }
3757
3758 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3759 {
3760         u32 tmp, sh_static_mem_cfg;
3761         int i;
3762
3763         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3764         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3765         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3766         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3767
3768         gfx_v8_0_tiling_mode_table_init(adev);
3769         gfx_v8_0_setup_rb(adev);
3770         gfx_v8_0_get_cu_info(adev);
3771         gfx_v8_0_config_init(adev);
3772
3773         /* XXX SH_MEM regs */
3774         /* where to put LDS, scratch, GPUVM in FSA64 space */
3775         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3776                                    SWIZZLE_ENABLE, 1);
3777         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3778                                    ELEMENT_SIZE, 1);
3779         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3780                                    INDEX_STRIDE, 3);
3781         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3782
3783         mutex_lock(&adev->srbm_mutex);
3784         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3785                 vi_srbm_select(adev, 0, 0, 0, i);
3786                 /* CP and shaders */
3787                 if (i == 0) {
3788                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3789                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3790                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3791                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3792                         WREG32(mmSH_MEM_CONFIG, tmp);
3793                         WREG32(mmSH_MEM_BASES, 0);
3794                 } else {
3795                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3796                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3797                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3798                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3799                         WREG32(mmSH_MEM_CONFIG, tmp);
3800                         tmp = adev->mc.shared_aperture_start >> 48;
3801                         WREG32(mmSH_MEM_BASES, tmp);
3802                 }
3803
3804                 WREG32(mmSH_MEM_APE1_BASE, 1);
3805                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3806         }
3807         vi_srbm_select(adev, 0, 0, 0, 0);
3808         mutex_unlock(&adev->srbm_mutex);
3809
3810         gfx_v8_0_init_compute_vmid(adev);
3811
3812         mutex_lock(&adev->grbm_idx_mutex);
3813         /*
3814          * making sure that the following register writes will be broadcasted
3815          * to all the shaders
3816          */
3817         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3818
3819         WREG32(mmPA_SC_FIFO_SIZE,
3820                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3821                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3822                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3823                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3824                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3825                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3826                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3827                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3828
3829         tmp = RREG32(mmSPI_ARB_PRIORITY);
3830         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3831         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3832         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3833         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3834         WREG32(mmSPI_ARB_PRIORITY, tmp);
3835
3836         mutex_unlock(&adev->grbm_idx_mutex);
3837
3838 }
3839
3840 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3841 {
3842         u32 i, j, k;
3843         u32 mask;
3844
3845         mutex_lock(&adev->grbm_idx_mutex);
3846         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3847                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3848                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3849                         for (k = 0; k < adev->usec_timeout; k++) {
3850                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3851                                         break;
3852                                 udelay(1);
3853                         }
3854                 }
3855         }
3856         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3857         mutex_unlock(&adev->grbm_idx_mutex);
3858
3859         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3860                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3861                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3862                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3863         for (k = 0; k < adev->usec_timeout; k++) {
3864                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3865                         break;
3866                 udelay(1);
3867         }
3868 }
3869
3870 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3871                                                bool enable)
3872 {
3873         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3874
3875         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3876         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3877         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3878         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3879
3880         WREG32(mmCP_INT_CNTL_RING0, tmp);
3881 }
3882
3883 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3884 {
3885         /* csib */
3886         WREG32(mmRLC_CSIB_ADDR_HI,
3887                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3888         WREG32(mmRLC_CSIB_ADDR_LO,
3889                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3890         WREG32(mmRLC_CSIB_LENGTH,
3891                         adev->gfx.rlc.clear_state_size);
3892 }
3893
3894 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3895                                 int ind_offset,
3896                                 int list_size,
3897                                 int *unique_indices,
3898                                 int *indices_count,
3899                                 int max_indices,
3900                                 int *ind_start_offsets,
3901                                 int *offset_count,
3902                                 int max_offset)
3903 {
3904         int indices;
3905         bool new_entry = true;
3906
3907         for (; ind_offset < list_size; ind_offset++) {
3908
3909                 if (new_entry) {
3910                         new_entry = false;
3911                         ind_start_offsets[*offset_count] = ind_offset;
3912                         *offset_count = *offset_count + 1;
3913                         BUG_ON(*offset_count >= max_offset);
3914                 }
3915
3916                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3917                         new_entry = true;
3918                         continue;
3919                 }
3920
3921                 ind_offset += 2;
3922
3923                 /* look for the matching indice */
3924                 for (indices = 0;
3925                         indices < *indices_count;
3926                         indices++) {
3927                         if (unique_indices[indices] ==
3928                                 register_list_format[ind_offset])
3929                                 break;
3930                 }
3931
3932                 if (indices >= *indices_count) {
3933                         unique_indices[*indices_count] =
3934                                 register_list_format[ind_offset];
3935                         indices = *indices_count;
3936                         *indices_count = *indices_count + 1;
3937                         BUG_ON(*indices_count >= max_indices);
3938                 }
3939
3940                 register_list_format[ind_offset] = indices;
3941         }
3942 }
3943
3944 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3945 {
3946         int i, temp, data;
3947         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3948         int indices_count = 0;
3949         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3950         int offset_count = 0;
3951
3952         int list_size;
3953         unsigned int *register_list_format =
3954                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3955         if (!register_list_format)
3956                 return -ENOMEM;
3957         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3958                         adev->gfx.rlc.reg_list_format_size_bytes);
3959
3960         gfx_v8_0_parse_ind_reg_list(register_list_format,
3961                                 RLC_FormatDirectRegListLength,
3962                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3963                                 unique_indices,
3964                                 &indices_count,
3965                                 ARRAY_SIZE(unique_indices),
3966                                 indirect_start_offsets,
3967                                 &offset_count,
3968                                 ARRAY_SIZE(indirect_start_offsets));
3969
3970         /* save and restore list */
3971         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3972
3973         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3974         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3975                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3976
3977         /* indirect list */
3978         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3979         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3980                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3981
3982         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3983         list_size = list_size >> 1;
3984         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3985         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3986
3987         /* starting offsets starts */
3988         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3989                 adev->gfx.rlc.starting_offsets_start);
3990         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3991                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3992                                 indirect_start_offsets[i]);
3993
3994         /* unique indices */
3995         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3996         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3997         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
3998                 if (unique_indices[i] != 0) {
3999                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4000                         WREG32(data + i, unique_indices[i] >> 20);
4001                 }
4002         }
4003         kfree(register_list_format);
4004
4005         return 0;
4006 }
4007
4008 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4009 {
4010         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4011 }
4012
4013 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4014 {
4015         uint32_t data;
4016
4017         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4018
4019         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4020         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4021         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4022         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4023         WREG32(mmRLC_PG_DELAY, data);
4024
4025         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4026         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4027
4028 }
4029
4030 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4031                                                 bool enable)
4032 {
4033         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4034 }
4035
4036 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4037                                                   bool enable)
4038 {
4039         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4040 }
4041
4042 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4043 {
4044         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4045 }
4046
4047 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4048 {
4049         if ((adev->asic_type == CHIP_CARRIZO) ||
4050             (adev->asic_type == CHIP_STONEY)) {
4051                 gfx_v8_0_init_csb(adev);
4052                 gfx_v8_0_init_save_restore_list(adev);
4053                 gfx_v8_0_enable_save_restore_machine(adev);
4054                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4055                 gfx_v8_0_init_power_gating(adev);
4056                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4057         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4058                    (adev->asic_type == CHIP_POLARIS12)) {
4059                 gfx_v8_0_init_csb(adev);
4060                 gfx_v8_0_init_save_restore_list(adev);
4061                 gfx_v8_0_enable_save_restore_machine(adev);
4062                 gfx_v8_0_init_power_gating(adev);
4063         }
4064
4065 }
4066
4067 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4068 {
4069         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4070
4071         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4072         gfx_v8_0_wait_for_rlc_serdes(adev);
4073 }
4074
4075 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4076 {
4077         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4078         udelay(50);
4079
4080         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4081         udelay(50);
4082 }
4083
4084 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4085 {
4086         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4087
4088         /* carrizo do enable cp interrupt after cp inited */
4089         if (!(adev->flags & AMD_IS_APU))
4090                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4091
4092         udelay(50);
4093 }
4094
4095 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4096 {
4097         const struct rlc_firmware_header_v2_0 *hdr;
4098         const __le32 *fw_data;
4099         unsigned i, fw_size;
4100
4101         if (!adev->gfx.rlc_fw)
4102                 return -EINVAL;
4103
4104         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4105         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4106
4107         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4108                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4109         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4110
4111         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4112         for (i = 0; i < fw_size; i++)
4113                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4114         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4115
4116         return 0;
4117 }
4118
4119 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4120 {
4121         int r;
4122         u32 tmp;
4123
4124         gfx_v8_0_rlc_stop(adev);
4125
4126         /* disable CG */
4127         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4128         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4129                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4130         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4131         if (adev->asic_type == CHIP_POLARIS11 ||
4132             adev->asic_type == CHIP_POLARIS10 ||
4133             adev->asic_type == CHIP_POLARIS12) {
4134                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4135                 tmp &= ~0x3;
4136                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4137         }
4138
4139         /* disable PG */
4140         WREG32(mmRLC_PG_CNTL, 0);
4141
4142         gfx_v8_0_rlc_reset(adev);
4143         gfx_v8_0_init_pg(adev);
4144
4145
4146         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4147                 /* legacy rlc firmware loading */
4148                 r = gfx_v8_0_rlc_load_microcode(adev);
4149                 if (r)
4150                         return r;
4151         }
4152
4153         gfx_v8_0_rlc_start(adev);
4154
4155         return 0;
4156 }
4157
4158 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4159 {
4160         int i;
4161         u32 tmp = RREG32(mmCP_ME_CNTL);
4162
4163         if (enable) {
4164                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4165                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4166                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4167         } else {
4168                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4169                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4170                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4171                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4172                         adev->gfx.gfx_ring[i].ready = false;
4173         }
4174         WREG32(mmCP_ME_CNTL, tmp);
4175         udelay(50);
4176 }
4177
4178 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4179 {
4180         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4181         const struct gfx_firmware_header_v1_0 *ce_hdr;
4182         const struct gfx_firmware_header_v1_0 *me_hdr;
4183         const __le32 *fw_data;
4184         unsigned i, fw_size;
4185
4186         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4187                 return -EINVAL;
4188
4189         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4190                 adev->gfx.pfp_fw->data;
4191         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4192                 adev->gfx.ce_fw->data;
4193         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4194                 adev->gfx.me_fw->data;
4195
4196         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4197         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4198         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4199
4200         gfx_v8_0_cp_gfx_enable(adev, false);
4201
4202         /* PFP */
4203         fw_data = (const __le32 *)
4204                 (adev->gfx.pfp_fw->data +
4205                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4206         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4207         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4208         for (i = 0; i < fw_size; i++)
4209                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4210         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4211
4212         /* CE */
4213         fw_data = (const __le32 *)
4214                 (adev->gfx.ce_fw->data +
4215                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4216         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4217         WREG32(mmCP_CE_UCODE_ADDR, 0);
4218         for (i = 0; i < fw_size; i++)
4219                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4220         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4221
4222         /* ME */
4223         fw_data = (const __le32 *)
4224                 (adev->gfx.me_fw->data +
4225                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4226         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4227         WREG32(mmCP_ME_RAM_WADDR, 0);
4228         for (i = 0; i < fw_size; i++)
4229                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4230         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4231
4232         return 0;
4233 }
4234
4235 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4236 {
4237         u32 count = 0;
4238         const struct cs_section_def *sect = NULL;
4239         const struct cs_extent_def *ext = NULL;
4240
4241         /* begin clear state */
4242         count += 2;
4243         /* context control state */
4244         count += 3;
4245
4246         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4247                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4248                         if (sect->id == SECT_CONTEXT)
4249                                 count += 2 + ext->reg_count;
4250                         else
4251                                 return 0;
4252                 }
4253         }
4254         /* pa_sc_raster_config/pa_sc_raster_config1 */
4255         count += 4;
4256         /* end clear state */
4257         count += 2;
4258         /* clear state */
4259         count += 2;
4260
4261         return count;
4262 }
4263
4264 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4265 {
4266         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4267         const struct cs_section_def *sect = NULL;
4268         const struct cs_extent_def *ext = NULL;
4269         int r, i;
4270
4271         /* init the CP */
4272         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4273         WREG32(mmCP_ENDIAN_SWAP, 0);
4274         WREG32(mmCP_DEVICE_ID, 1);
4275
4276         gfx_v8_0_cp_gfx_enable(adev, true);
4277
4278         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4279         if (r) {
4280                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4281                 return r;
4282         }
4283
4284         /* clear state buffer */
4285         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4286         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4287
4288         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4289         amdgpu_ring_write(ring, 0x80000000);
4290         amdgpu_ring_write(ring, 0x80000000);
4291
4292         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4293                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4294                         if (sect->id == SECT_CONTEXT) {
4295                                 amdgpu_ring_write(ring,
4296                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4297                                                ext->reg_count));
4298                                 amdgpu_ring_write(ring,
4299                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4300                                 for (i = 0; i < ext->reg_count; i++)
4301                                         amdgpu_ring_write(ring, ext->extent[i]);
4302                         }
4303                 }
4304         }
4305
4306         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4307         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4308         switch (adev->asic_type) {
4309         case CHIP_TONGA:
4310         case CHIP_POLARIS10:
4311                 amdgpu_ring_write(ring, 0x16000012);
4312                 amdgpu_ring_write(ring, 0x0000002A);
4313                 break;
4314         case CHIP_POLARIS11:
4315         case CHIP_POLARIS12:
4316                 amdgpu_ring_write(ring, 0x16000012);
4317                 amdgpu_ring_write(ring, 0x00000000);
4318                 break;
4319         case CHIP_FIJI:
4320                 amdgpu_ring_write(ring, 0x3a00161a);
4321                 amdgpu_ring_write(ring, 0x0000002e);
4322                 break;
4323         case CHIP_CARRIZO:
4324                 amdgpu_ring_write(ring, 0x00000002);
4325                 amdgpu_ring_write(ring, 0x00000000);
4326                 break;
4327         case CHIP_TOPAZ:
4328                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4329                                 0x00000000 : 0x00000002);
4330                 amdgpu_ring_write(ring, 0x00000000);
4331                 break;
4332         case CHIP_STONEY:
4333                 amdgpu_ring_write(ring, 0x00000000);
4334                 amdgpu_ring_write(ring, 0x00000000);
4335                 break;
4336         default:
4337                 BUG();
4338         }
4339
4340         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4341         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4342
4343         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4344         amdgpu_ring_write(ring, 0);
4345
4346         /* init the CE partitions */
4347         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4348         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4349         amdgpu_ring_write(ring, 0x8000);
4350         amdgpu_ring_write(ring, 0x8000);
4351
4352         amdgpu_ring_commit(ring);
4353
4354         return 0;
4355 }
4356 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4357 {
4358         u32 tmp;
4359         /* no gfx doorbells on iceland */
4360         if (adev->asic_type == CHIP_TOPAZ)
4361                 return;
4362
4363         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4364
4365         if (ring->use_doorbell) {
4366                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4367                                 DOORBELL_OFFSET, ring->doorbell_index);
4368                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4369                                                 DOORBELL_HIT, 0);
4370                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4371                                             DOORBELL_EN, 1);
4372         } else {
4373                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4374         }
4375
4376         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4377
4378         if (adev->flags & AMD_IS_APU)
4379                 return;
4380
4381         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4382                                         DOORBELL_RANGE_LOWER,
4383                                         AMDGPU_DOORBELL_GFX_RING0);
4384         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4385
4386         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4387                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4388 }
4389
4390 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4391 {
4392         struct amdgpu_ring *ring;
4393         u32 tmp;
4394         u32 rb_bufsz;
4395         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4396         int r;
4397
4398         /* Set the write pointer delay */
4399         WREG32(mmCP_RB_WPTR_DELAY, 0);
4400
4401         /* set the RB to use vmid 0 */
4402         WREG32(mmCP_RB_VMID, 0);
4403
4404         /* Set ring buffer size */
4405         ring = &adev->gfx.gfx_ring[0];
4406         rb_bufsz = order_base_2(ring->ring_size / 8);
4407         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4408         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4409         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4410         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4411 #ifdef __BIG_ENDIAN
4412         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4413 #endif
4414         WREG32(mmCP_RB0_CNTL, tmp);
4415
4416         /* Initialize the ring buffer's read and write pointers */
4417         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4418         ring->wptr = 0;
4419         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4420
4421         /* set the wb address wether it's enabled or not */
4422         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4423         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4424         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4425
4426         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4427         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4428         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4429         mdelay(1);
4430         WREG32(mmCP_RB0_CNTL, tmp);
4431
4432         rb_addr = ring->gpu_addr >> 8;
4433         WREG32(mmCP_RB0_BASE, rb_addr);
4434         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4435
4436         gfx_v8_0_set_cpg_door_bell(adev, ring);
4437         /* start the ring */
4438         amdgpu_ring_clear_ring(ring);
4439         gfx_v8_0_cp_gfx_start(adev);
4440         ring->ready = true;
4441         r = amdgpu_ring_test_ring(ring);
4442         if (r)
4443                 ring->ready = false;
4444
4445         return r;
4446 }
4447
4448 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4449 {
4450         int i;
4451
4452         if (enable) {
4453                 WREG32(mmCP_MEC_CNTL, 0);
4454         } else {
4455                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4456                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4457                         adev->gfx.compute_ring[i].ready = false;
4458                 adev->gfx.kiq.ring.ready = false;
4459         }
4460         udelay(50);
4461 }
4462
4463 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4464 {
4465         const struct gfx_firmware_header_v1_0 *mec_hdr;
4466         const __le32 *fw_data;
4467         unsigned i, fw_size;
4468
4469         if (!adev->gfx.mec_fw)
4470                 return -EINVAL;
4471
4472         gfx_v8_0_cp_compute_enable(adev, false);
4473
4474         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4475         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4476
4477         fw_data = (const __le32 *)
4478                 (adev->gfx.mec_fw->data +
4479                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4480         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4481
4482         /* MEC1 */
4483         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4484         for (i = 0; i < fw_size; i++)
4485                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4486         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4487
4488         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4489         if (adev->gfx.mec2_fw) {
4490                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4491
4492                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4493                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4494
4495                 fw_data = (const __le32 *)
4496                         (adev->gfx.mec2_fw->data +
4497                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4498                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4499
4500                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4501                 for (i = 0; i < fw_size; i++)
4502                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4503                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4504         }
4505
4506         return 0;
4507 }
4508
4509 /* KIQ functions */
4510 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4511 {
4512         uint32_t tmp;
4513         struct amdgpu_device *adev = ring->adev;
4514
4515         /* tell RLC which is KIQ queue */
4516         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4517         tmp &= 0xffffff00;
4518         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4519         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4520         tmp |= 0x80;
4521         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4522 }
4523
4524 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4525 {
4526         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4527         uint32_t scratch, tmp = 0;
4528         uint64_t queue_mask = 0;
4529         int r, i;
4530
4531         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4532                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4533                         continue;
4534
4535                 /* This situation may be hit in the future if a new HW
4536                  * generation exposes more than 64 queues. If so, the
4537                  * definition of queue_mask needs updating */
4538                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4539                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4540                         break;
4541                 }
4542
4543                 queue_mask |= (1ull << i);
4544         }
4545
4546         r = amdgpu_gfx_scratch_get(adev, &scratch);
4547         if (r) {
4548                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4549                 return r;
4550         }
4551         WREG32(scratch, 0xCAFEDEAD);
4552
4553         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4554         if (r) {
4555                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4556                 amdgpu_gfx_scratch_free(adev, scratch);
4557                 return r;
4558         }
4559         /* set resources */
4560         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4561         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4562         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4563         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4564         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4565         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4566         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4567         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4568         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4569                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4570                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4571                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4572
4573                 /* map queues */
4574                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4575                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4576                 amdgpu_ring_write(kiq_ring,
4577                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4578                 amdgpu_ring_write(kiq_ring,
4579                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4580                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4581                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4582                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4583                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4584                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4585                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4586                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4587         }
4588         /* write to scratch for completion */
4589         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4590         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4591         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4592         amdgpu_ring_commit(kiq_ring);
4593
4594         for (i = 0; i < adev->usec_timeout; i++) {
4595                 tmp = RREG32(scratch);
4596                 if (tmp == 0xDEADBEEF)
4597                         break;
4598                 DRM_UDELAY(1);
4599         }
4600         if (i >= adev->usec_timeout) {
4601                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4602                           scratch, tmp);
4603                 r = -EINVAL;
4604         }
4605         amdgpu_gfx_scratch_free(adev, scratch);
4606
4607         return r;
4608 }
4609
4610 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4611 {
4612         int i, r = 0;
4613
4614         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4615                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4616                 for (i = 0; i < adev->usec_timeout; i++) {
4617                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4618                                 break;
4619                         udelay(1);
4620                 }
4621                 if (i == adev->usec_timeout)
4622                         r = -ETIMEDOUT;
4623         }
4624         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4625         WREG32(mmCP_HQD_PQ_RPTR, 0);
4626         WREG32(mmCP_HQD_PQ_WPTR, 0);
4627
4628         return r;
4629 }
4630
4631 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4632 {
4633         struct amdgpu_device *adev = ring->adev;
4634         struct vi_mqd *mqd = ring->mqd_ptr;
4635         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4636         uint32_t tmp;
4637
4638         mqd->header = 0xC0310800;
4639         mqd->compute_pipelinestat_enable = 0x00000001;
4640         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4641         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4642         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4643         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4644         mqd->compute_misc_reserved = 0x00000003;
4645         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4646                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4647         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4648                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4649         eop_base_addr = ring->eop_gpu_addr >> 8;
4650         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4651         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4652
4653         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4654         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4655         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4656                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4657
4658         mqd->cp_hqd_eop_control = tmp;
4659
4660         /* enable doorbell? */
4661         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4662                             CP_HQD_PQ_DOORBELL_CONTROL,
4663                             DOORBELL_EN,
4664                             ring->use_doorbell ? 1 : 0);
4665
4666         mqd->cp_hqd_pq_doorbell_control = tmp;
4667
4668         /* set the pointer to the MQD */
4669         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4670         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4671
4672         /* set MQD vmid to 0 */
4673         tmp = RREG32(mmCP_MQD_CONTROL);
4674         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4675         mqd->cp_mqd_control = tmp;
4676
4677         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4678         hqd_gpu_addr = ring->gpu_addr >> 8;
4679         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4680         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4681
4682         /* set up the HQD, this is similar to CP_RB0_CNTL */
4683         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4684         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4685                             (order_base_2(ring->ring_size / 4) - 1));
4686         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4687                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4688 #ifdef __BIG_ENDIAN
4689         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4690 #endif
4691         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4692         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4693         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4694         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4695         mqd->cp_hqd_pq_control = tmp;
4696
4697         /* set the wb address whether it's enabled or not */
4698         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4699         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4700         mqd->cp_hqd_pq_rptr_report_addr_hi =
4701                 upper_32_bits(wb_gpu_addr) & 0xffff;
4702
4703         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4704         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4705         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4706         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4707
4708         tmp = 0;
4709         /* enable the doorbell if requested */
4710         if (ring->use_doorbell) {
4711                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4712                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4713                                 DOORBELL_OFFSET, ring->doorbell_index);
4714
4715                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4716                                          DOORBELL_EN, 1);
4717                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4718                                          DOORBELL_SOURCE, 0);
4719                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4720                                          DOORBELL_HIT, 0);
4721         }
4722
4723         mqd->cp_hqd_pq_doorbell_control = tmp;
4724
4725         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4726         ring->wptr = 0;
4727         mqd->cp_hqd_pq_wptr = ring->wptr;
4728         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4729
4730         /* set the vmid for the queue */
4731         mqd->cp_hqd_vmid = 0;
4732
4733         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4734         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4735         mqd->cp_hqd_persistent_state = tmp;
4736
4737         /* set MTYPE */
4738         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4739         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4740         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4741         mqd->cp_hqd_ib_control = tmp;
4742
4743         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4744         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4745         mqd->cp_hqd_iq_timer = tmp;
4746
4747         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4748         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4749         mqd->cp_hqd_ctx_save_control = tmp;
4750
4751         /* defaults */
4752         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4753         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4754         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4755         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4756         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4757         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4758         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4759         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4760         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4761         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4762         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4763         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4764         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4765         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4766         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4767
4768         /* activate the queue */
4769         mqd->cp_hqd_active = 1;
4770
4771         return 0;
4772 }
4773
4774 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4775                         struct vi_mqd *mqd)
4776 {
4777         uint32_t mqd_reg;
4778         uint32_t *mqd_data;
4779
4780         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4781         mqd_data = &mqd->cp_mqd_base_addr_lo;
4782
4783         /* disable wptr polling */
4784         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4785
4786         /* program all HQD registers */
4787         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4788                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4789
4790         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4791          * This is safe since EOP RPTR==WPTR for any inactive HQD
4792          * on ASICs that do not support context-save.
4793          * EOP writes/reads can start anywhere in the ring.
4794          */
4795         if (adev->asic_type != CHIP_TONGA) {
4796                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4797                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4798                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4799         }
4800
4801         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4802                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4803
4804         /* activate the HQD */
4805         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4806                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4807
4808         return 0;
4809 }
4810
4811 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4812 {
4813         struct amdgpu_device *adev = ring->adev;
4814         struct vi_mqd *mqd = ring->mqd_ptr;
4815         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4816
4817         gfx_v8_0_kiq_setting(ring);
4818
4819         if (adev->in_sriov_reset) { /* for GPU_RESET case */
4820                 /* reset MQD to a clean status */
4821                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4822                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4823
4824                 /* reset ring buffer */
4825                 ring->wptr = 0;
4826                 amdgpu_ring_clear_ring(ring);
4827                 mutex_lock(&adev->srbm_mutex);
4828                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4829                 gfx_v8_0_mqd_commit(adev, mqd);
4830                 vi_srbm_select(adev, 0, 0, 0, 0);
4831                 mutex_unlock(&adev->srbm_mutex);
4832         } else {
4833                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4834                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4835                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4836                 mutex_lock(&adev->srbm_mutex);
4837                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4838                 gfx_v8_0_mqd_init(ring);
4839                 gfx_v8_0_mqd_commit(adev, mqd);
4840                 vi_srbm_select(adev, 0, 0, 0, 0);
4841                 mutex_unlock(&adev->srbm_mutex);
4842
4843                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4844                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4845         }
4846
4847         return 0;
4848 }
4849
4850 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4851 {
4852         struct amdgpu_device *adev = ring->adev;
4853         struct vi_mqd *mqd = ring->mqd_ptr;
4854         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4855
4856         if (!adev->in_sriov_reset && !adev->gfx.in_suspend) {
4857                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4858                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4859                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4860                 mutex_lock(&adev->srbm_mutex);
4861                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4862                 gfx_v8_0_mqd_init(ring);
4863                 vi_srbm_select(adev, 0, 0, 0, 0);
4864                 mutex_unlock(&adev->srbm_mutex);
4865
4866                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4867                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4868         } else if (adev->in_sriov_reset) { /* for GPU_RESET case */
4869                 /* reset MQD to a clean status */
4870                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4871                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4872                 /* reset ring buffer */
4873                 ring->wptr = 0;
4874                 amdgpu_ring_clear_ring(ring);
4875         } else {
4876                 amdgpu_ring_clear_ring(ring);
4877         }
4878         return 0;
4879 }
4880
4881 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4882 {
4883         if (adev->asic_type > CHIP_TONGA) {
4884                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4885                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4886         }
4887         /* enable doorbells */
4888         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4889 }
4890
4891 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4892 {
4893         struct amdgpu_ring *ring = NULL;
4894         int r = 0, i;
4895
4896         gfx_v8_0_cp_compute_enable(adev, true);
4897
4898         ring = &adev->gfx.kiq.ring;
4899
4900         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4901         if (unlikely(r != 0))
4902                 goto done;
4903
4904         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4905         if (!r) {
4906                 r = gfx_v8_0_kiq_init_queue(ring);
4907                 amdgpu_bo_kunmap(ring->mqd_obj);
4908                 ring->mqd_ptr = NULL;
4909         }
4910         amdgpu_bo_unreserve(ring->mqd_obj);
4911         if (r)
4912                 goto done;
4913
4914         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4915                 ring = &adev->gfx.compute_ring[i];
4916
4917                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4918                 if (unlikely(r != 0))
4919                         goto done;
4920                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4921                 if (!r) {
4922                         r = gfx_v8_0_kcq_init_queue(ring);
4923                         amdgpu_bo_kunmap(ring->mqd_obj);
4924                         ring->mqd_ptr = NULL;
4925                 }
4926                 amdgpu_bo_unreserve(ring->mqd_obj);
4927                 if (r)
4928                         goto done;
4929         }
4930
4931         gfx_v8_0_set_mec_doorbell_range(adev);
4932
4933         r = gfx_v8_0_kiq_kcq_enable(adev);
4934         if (r)
4935                 goto done;
4936
4937         /* Test KIQ */
4938         ring = &adev->gfx.kiq.ring;
4939         ring->ready = true;
4940         r = amdgpu_ring_test_ring(ring);
4941         if (r) {
4942                 ring->ready = false;
4943                 goto done;
4944         }
4945
4946         /* Test KCQs */
4947         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4948                 ring = &adev->gfx.compute_ring[i];
4949                 ring->ready = true;
4950                 r = amdgpu_ring_test_ring(ring);
4951                 if (r)
4952                         ring->ready = false;
4953         }
4954
4955 done:
4956         return r;
4957 }
4958
4959 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4960 {
4961         int r;
4962
4963         if (!(adev->flags & AMD_IS_APU))
4964                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4965
4966         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4967                         /* legacy firmware loading */
4968                 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4969                 if (r)
4970                         return r;
4971
4972                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4973                 if (r)
4974                         return r;
4975         }
4976
4977         r = gfx_v8_0_cp_gfx_resume(adev);
4978         if (r)
4979                 return r;
4980
4981         r = gfx_v8_0_kiq_resume(adev);
4982         if (r)
4983                 return r;
4984
4985         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4986
4987         return 0;
4988 }
4989
4990 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4991 {
4992         gfx_v8_0_cp_gfx_enable(adev, enable);
4993         gfx_v8_0_cp_compute_enable(adev, enable);
4994 }
4995
4996 static int gfx_v8_0_hw_init(void *handle)
4997 {
4998         int r;
4999         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5000
5001         gfx_v8_0_init_golden_registers(adev);
5002         gfx_v8_0_gpu_init(adev);
5003
5004         r = gfx_v8_0_rlc_resume(adev);
5005         if (r)
5006                 return r;
5007
5008         r = gfx_v8_0_cp_resume(adev);
5009
5010         return r;
5011 }
5012
5013 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
5014 {
5015         struct amdgpu_device *adev = kiq_ring->adev;
5016         uint32_t scratch, tmp = 0;
5017         int r, i;
5018
5019         r = amdgpu_gfx_scratch_get(adev, &scratch);
5020         if (r) {
5021                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5022                 return r;
5023         }
5024         WREG32(scratch, 0xCAFEDEAD);
5025
5026         r = amdgpu_ring_alloc(kiq_ring, 10);
5027         if (r) {
5028                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5029                 amdgpu_gfx_scratch_free(adev, scratch);
5030                 return r;
5031         }
5032
5033         /* unmap queues */
5034         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5035         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5036                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5037                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5038                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5039                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5040         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5041         amdgpu_ring_write(kiq_ring, 0);
5042         amdgpu_ring_write(kiq_ring, 0);
5043         amdgpu_ring_write(kiq_ring, 0);
5044         /* write to scratch for completion */
5045         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5046         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5047         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5048         amdgpu_ring_commit(kiq_ring);
5049
5050         for (i = 0; i < adev->usec_timeout; i++) {
5051                 tmp = RREG32(scratch);
5052                 if (tmp == 0xDEADBEEF)
5053                         break;
5054                 DRM_UDELAY(1);
5055         }
5056         if (i >= adev->usec_timeout) {
5057                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5058                 r = -EINVAL;
5059         }
5060         amdgpu_gfx_scratch_free(adev, scratch);
5061         return r;
5062 }
5063
5064 static int gfx_v8_0_hw_fini(void *handle)
5065 {
5066         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5067         int i;
5068
5069         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5070         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5071
5072         /* disable KCQ to avoid CPC touch memory not valid anymore */
5073         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5074                 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5075
5076         if (amdgpu_sriov_vf(adev)) {
5077                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5078                 return 0;
5079         }
5080         gfx_v8_0_cp_enable(adev, false);
5081         gfx_v8_0_rlc_stop(adev);
5082
5083         amdgpu_set_powergating_state(adev,
5084                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5085
5086         return 0;
5087 }
5088
5089 static int gfx_v8_0_suspend(void *handle)
5090 {
5091         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5092         adev->gfx.in_suspend = true;
5093         return gfx_v8_0_hw_fini(adev);
5094 }
5095
5096 static int gfx_v8_0_resume(void *handle)
5097 {
5098         int r;
5099         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5100
5101         r = gfx_v8_0_hw_init(adev);
5102         adev->gfx.in_suspend = false;
5103         return r;
5104 }
5105
5106 static bool gfx_v8_0_is_idle(void *handle)
5107 {
5108         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5109
5110         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5111                 return false;
5112         else
5113                 return true;
5114 }
5115
5116 static int gfx_v8_0_wait_for_idle(void *handle)
5117 {
5118         unsigned i;
5119         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5120
5121         for (i = 0; i < adev->usec_timeout; i++) {
5122                 if (gfx_v8_0_is_idle(handle))
5123                         return 0;
5124
5125                 udelay(1);
5126         }
5127         return -ETIMEDOUT;
5128 }
5129
5130 static bool gfx_v8_0_check_soft_reset(void *handle)
5131 {
5132         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5133         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5134         u32 tmp;
5135
5136         /* GRBM_STATUS */
5137         tmp = RREG32(mmGRBM_STATUS);
5138         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5139                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5140                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5141                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5142                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5143                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5144                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5145                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5146                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5147                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5148                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5149                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5150                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5151         }
5152
5153         /* GRBM_STATUS2 */
5154         tmp = RREG32(mmGRBM_STATUS2);
5155         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5156                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5157                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5158
5159         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5160             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5161             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5162                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5163                                                 SOFT_RESET_CPF, 1);
5164                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5165                                                 SOFT_RESET_CPC, 1);
5166                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5167                                                 SOFT_RESET_CPG, 1);
5168                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5169                                                 SOFT_RESET_GRBM, 1);
5170         }
5171
5172         /* SRBM_STATUS */
5173         tmp = RREG32(mmSRBM_STATUS);
5174         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5175                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5176                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5177         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5178                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5179                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5180
5181         if (grbm_soft_reset || srbm_soft_reset) {
5182                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5183                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5184                 return true;
5185         } else {
5186                 adev->gfx.grbm_soft_reset = 0;
5187                 adev->gfx.srbm_soft_reset = 0;
5188                 return false;
5189         }
5190 }
5191
5192 static int gfx_v8_0_pre_soft_reset(void *handle)
5193 {
5194         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5195         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5196
5197         if ((!adev->gfx.grbm_soft_reset) &&
5198             (!adev->gfx.srbm_soft_reset))
5199                 return 0;
5200
5201         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5202         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5203
5204         /* stop the rlc */
5205         gfx_v8_0_rlc_stop(adev);
5206
5207         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5208             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5209                 /* Disable GFX parsing/prefetching */
5210                 gfx_v8_0_cp_gfx_enable(adev, false);
5211
5212         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5213             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5214             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5215             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5216                 int i;
5217
5218                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5219                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5220
5221                         mutex_lock(&adev->srbm_mutex);
5222                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5223                         gfx_v8_0_deactivate_hqd(adev, 2);
5224                         vi_srbm_select(adev, 0, 0, 0, 0);
5225                         mutex_unlock(&adev->srbm_mutex);
5226                 }
5227                 /* Disable MEC parsing/prefetching */
5228                 gfx_v8_0_cp_compute_enable(adev, false);
5229         }
5230
5231        return 0;
5232 }
5233
5234 static int gfx_v8_0_soft_reset(void *handle)
5235 {
5236         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5237         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5238         u32 tmp;
5239
5240         if ((!adev->gfx.grbm_soft_reset) &&
5241             (!adev->gfx.srbm_soft_reset))
5242                 return 0;
5243
5244         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5245         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5246
5247         if (grbm_soft_reset || srbm_soft_reset) {
5248                 tmp = RREG32(mmGMCON_DEBUG);
5249                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5250                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5251                 WREG32(mmGMCON_DEBUG, tmp);
5252                 udelay(50);
5253         }
5254
5255         if (grbm_soft_reset) {
5256                 tmp = RREG32(mmGRBM_SOFT_RESET);
5257                 tmp |= grbm_soft_reset;
5258                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5259                 WREG32(mmGRBM_SOFT_RESET, tmp);
5260                 tmp = RREG32(mmGRBM_SOFT_RESET);
5261
5262                 udelay(50);
5263
5264                 tmp &= ~grbm_soft_reset;
5265                 WREG32(mmGRBM_SOFT_RESET, tmp);
5266                 tmp = RREG32(mmGRBM_SOFT_RESET);
5267         }
5268
5269         if (srbm_soft_reset) {
5270                 tmp = RREG32(mmSRBM_SOFT_RESET);
5271                 tmp |= srbm_soft_reset;
5272                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5273                 WREG32(mmSRBM_SOFT_RESET, tmp);
5274                 tmp = RREG32(mmSRBM_SOFT_RESET);
5275
5276                 udelay(50);
5277
5278                 tmp &= ~srbm_soft_reset;
5279                 WREG32(mmSRBM_SOFT_RESET, tmp);
5280                 tmp = RREG32(mmSRBM_SOFT_RESET);
5281         }
5282
5283         if (grbm_soft_reset || srbm_soft_reset) {
5284                 tmp = RREG32(mmGMCON_DEBUG);
5285                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5286                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5287                 WREG32(mmGMCON_DEBUG, tmp);
5288         }
5289
5290         /* Wait a little for things to settle down */
5291         udelay(50);
5292
5293         return 0;
5294 }
5295
5296 static int gfx_v8_0_post_soft_reset(void *handle)
5297 {
5298         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5299         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5300
5301         if ((!adev->gfx.grbm_soft_reset) &&
5302             (!adev->gfx.srbm_soft_reset))
5303                 return 0;
5304
5305         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5306         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5307
5308         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5309             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5310                 gfx_v8_0_cp_gfx_resume(adev);
5311
5312         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5313             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5314             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5315             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5316                 int i;
5317
5318                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5319                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5320
5321                         mutex_lock(&adev->srbm_mutex);
5322                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5323                         gfx_v8_0_deactivate_hqd(adev, 2);
5324                         vi_srbm_select(adev, 0, 0, 0, 0);
5325                         mutex_unlock(&adev->srbm_mutex);
5326                 }
5327                 gfx_v8_0_kiq_resume(adev);
5328         }
5329         gfx_v8_0_rlc_start(adev);
5330
5331         return 0;
5332 }
5333
5334 /**
5335  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5336  *
5337  * @adev: amdgpu_device pointer
5338  *
5339  * Fetches a GPU clock counter snapshot.
5340  * Returns the 64 bit clock counter snapshot.
5341  */
5342 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5343 {
5344         uint64_t clock;
5345
5346         mutex_lock(&adev->gfx.gpu_clock_mutex);
5347         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5348         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5349                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5350         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5351         return clock;
5352 }
5353
5354 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5355                                           uint32_t vmid,
5356                                           uint32_t gds_base, uint32_t gds_size,
5357                                           uint32_t gws_base, uint32_t gws_size,
5358                                           uint32_t oa_base, uint32_t oa_size)
5359 {
5360         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5361         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5362
5363         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5364         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5365
5366         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5367         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5368
5369         /* GDS Base */
5370         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5371         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5372                                 WRITE_DATA_DST_SEL(0)));
5373         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5374         amdgpu_ring_write(ring, 0);
5375         amdgpu_ring_write(ring, gds_base);
5376
5377         /* GDS Size */
5378         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5379         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5380                                 WRITE_DATA_DST_SEL(0)));
5381         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5382         amdgpu_ring_write(ring, 0);
5383         amdgpu_ring_write(ring, gds_size);
5384
5385         /* GWS */
5386         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5387         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5388                                 WRITE_DATA_DST_SEL(0)));
5389         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5390         amdgpu_ring_write(ring, 0);
5391         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5392
5393         /* OA */
5394         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5395         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5396                                 WRITE_DATA_DST_SEL(0)));
5397         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5398         amdgpu_ring_write(ring, 0);
5399         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5400 }
5401
5402 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5403 {
5404         WREG32(mmSQ_IND_INDEX,
5405                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5406                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5407                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5408                 (SQ_IND_INDEX__FORCE_READ_MASK));
5409         return RREG32(mmSQ_IND_DATA);
5410 }
5411
5412 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5413                            uint32_t wave, uint32_t thread,
5414                            uint32_t regno, uint32_t num, uint32_t *out)
5415 {
5416         WREG32(mmSQ_IND_INDEX,
5417                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5418                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5419                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5420                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5421                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5422                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5423         while (num--)
5424                 *(out++) = RREG32(mmSQ_IND_DATA);
5425 }
5426
5427 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5428 {
5429         /* type 0 wave data */
5430         dst[(*no_fields)++] = 0;
5431         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5432         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5433         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5434         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5435         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5436         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5437         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5438         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5439         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5440         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5441         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5442         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5443         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5444         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5445         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5446         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5447         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5448         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5449 }
5450
5451 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5452                                      uint32_t wave, uint32_t start,
5453                                      uint32_t size, uint32_t *dst)
5454 {
5455         wave_read_regs(
5456                 adev, simd, wave, 0,
5457                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5458 }
5459
5460
5461 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5462         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5463         .select_se_sh = &gfx_v8_0_select_se_sh,
5464         .read_wave_data = &gfx_v8_0_read_wave_data,
5465         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5466 };
5467
5468 static int gfx_v8_0_early_init(void *handle)
5469 {
5470         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5471
5472         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5473         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5474         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5475         gfx_v8_0_set_ring_funcs(adev);
5476         gfx_v8_0_set_irq_funcs(adev);
5477         gfx_v8_0_set_gds_init(adev);
5478         gfx_v8_0_set_rlc_funcs(adev);
5479
5480         return 0;
5481 }
5482
5483 static int gfx_v8_0_late_init(void *handle)
5484 {
5485         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5486         int r;
5487
5488         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5489         if (r)
5490                 return r;
5491
5492         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5493         if (r)
5494                 return r;
5495
5496         /* requires IBs so do in late init after IB pool is initialized */
5497         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5498         if (r)
5499                 return r;
5500
5501         amdgpu_set_powergating_state(adev,
5502                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5503
5504         return 0;
5505 }
5506
5507 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5508                                                        bool enable)
5509 {
5510         if ((adev->asic_type == CHIP_POLARIS11) ||
5511             (adev->asic_type == CHIP_POLARIS12))
5512                 /* Send msg to SMU via Powerplay */
5513                 amdgpu_set_powergating_state(adev,
5514                                              AMD_IP_BLOCK_TYPE_SMC,
5515                                              enable ?
5516                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5517
5518         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5519 }
5520
5521 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5522                                                         bool enable)
5523 {
5524         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5525 }
5526
5527 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5528                 bool enable)
5529 {
5530         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5531 }
5532
5533 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5534                                           bool enable)
5535 {
5536         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5537 }
5538
5539 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5540                                                 bool enable)
5541 {
5542         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5543
5544         /* Read any GFX register to wake up GFX. */
5545         if (!enable)
5546                 RREG32(mmDB_RENDER_CONTROL);
5547 }
5548
5549 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5550                                           bool enable)
5551 {
5552         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5553                 cz_enable_gfx_cg_power_gating(adev, true);
5554                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5555                         cz_enable_gfx_pipeline_power_gating(adev, true);
5556         } else {
5557                 cz_enable_gfx_cg_power_gating(adev, false);
5558                 cz_enable_gfx_pipeline_power_gating(adev, false);
5559         }
5560 }
5561
5562 static int gfx_v8_0_set_powergating_state(void *handle,
5563                                           enum amd_powergating_state state)
5564 {
5565         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5566         bool enable = (state == AMD_PG_STATE_GATE);
5567
5568         if (amdgpu_sriov_vf(adev))
5569                 return 0;
5570
5571         switch (adev->asic_type) {
5572         case CHIP_CARRIZO:
5573         case CHIP_STONEY:
5574
5575                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5576                         cz_enable_sck_slow_down_on_power_up(adev, true);
5577                         cz_enable_sck_slow_down_on_power_down(adev, true);
5578                 } else {
5579                         cz_enable_sck_slow_down_on_power_up(adev, false);
5580                         cz_enable_sck_slow_down_on_power_down(adev, false);
5581                 }
5582                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5583                         cz_enable_cp_power_gating(adev, true);
5584                 else
5585                         cz_enable_cp_power_gating(adev, false);
5586
5587                 cz_update_gfx_cg_power_gating(adev, enable);
5588
5589                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5590                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5591                 else
5592                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5593
5594                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5595                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5596                 else
5597                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5598                 break;
5599         case CHIP_POLARIS11:
5600         case CHIP_POLARIS12:
5601                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5602                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5603                 else
5604                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5605
5606                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5607                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5608                 else
5609                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5610
5611                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5612                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5613                 else
5614                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5615                 break;
5616         default:
5617                 break;
5618         }
5619
5620         return 0;
5621 }
5622
5623 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5624 {
5625         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5626         int data;
5627
5628         if (amdgpu_sriov_vf(adev))
5629                 *flags = 0;
5630
5631         /* AMD_CG_SUPPORT_GFX_MGCG */
5632         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5633         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5634                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5635
5636         /* AMD_CG_SUPPORT_GFX_CGLG */
5637         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5638         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5639                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5640
5641         /* AMD_CG_SUPPORT_GFX_CGLS */
5642         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5643                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5644
5645         /* AMD_CG_SUPPORT_GFX_CGTS */
5646         data = RREG32(mmCGTS_SM_CTRL_REG);
5647         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5648                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5649
5650         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5651         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5652                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5653
5654         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5655         data = RREG32(mmRLC_MEM_SLP_CNTL);
5656         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5657                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5658
5659         /* AMD_CG_SUPPORT_GFX_CP_LS */
5660         data = RREG32(mmCP_MEM_SLP_CNTL);
5661         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5662                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5663 }
5664
5665 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5666                                      uint32_t reg_addr, uint32_t cmd)
5667 {
5668         uint32_t data;
5669
5670         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5671
5672         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5673         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5674
5675         data = RREG32(mmRLC_SERDES_WR_CTRL);
5676         if (adev->asic_type == CHIP_STONEY)
5677                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5678                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5679                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5680                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5681                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5682                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5683                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5684                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5685                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5686         else
5687                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5688                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5689                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5690                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5691                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5692                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5693                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5694                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5695                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5696                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5697                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5698         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5699                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5700                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5701                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5702
5703         WREG32(mmRLC_SERDES_WR_CTRL, data);
5704 }
5705
5706 #define MSG_ENTER_RLC_SAFE_MODE     1
5707 #define MSG_EXIT_RLC_SAFE_MODE      0
5708 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5709 #define RLC_GPR_REG2__REQ__SHIFT 0
5710 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5711 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5712
5713 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5714 {
5715         u32 data;
5716         unsigned i;
5717
5718         data = RREG32(mmRLC_CNTL);
5719         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5720                 return;
5721
5722         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5723                 data |= RLC_SAFE_MODE__CMD_MASK;
5724                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5725                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5726                 WREG32(mmRLC_SAFE_MODE, data);
5727
5728                 for (i = 0; i < adev->usec_timeout; i++) {
5729                         if ((RREG32(mmRLC_GPM_STAT) &
5730                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5731                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5732                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5733                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5734                                 break;
5735                         udelay(1);
5736                 }
5737
5738                 for (i = 0; i < adev->usec_timeout; i++) {
5739                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5740                                 break;
5741                         udelay(1);
5742                 }
5743                 adev->gfx.rlc.in_safe_mode = true;
5744         }
5745 }
5746
5747 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5748 {
5749         u32 data = 0;
5750         unsigned i;
5751
5752         data = RREG32(mmRLC_CNTL);
5753         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5754                 return;
5755
5756         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5757                 if (adev->gfx.rlc.in_safe_mode) {
5758                         data |= RLC_SAFE_MODE__CMD_MASK;
5759                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5760                         WREG32(mmRLC_SAFE_MODE, data);
5761                         adev->gfx.rlc.in_safe_mode = false;
5762                 }
5763         }
5764
5765         for (i = 0; i < adev->usec_timeout; i++) {
5766                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5767                         break;
5768                 udelay(1);
5769         }
5770 }
5771
5772 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5773         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5774         .exit_safe_mode = iceland_exit_rlc_safe_mode
5775 };
5776
5777 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5778                                                       bool enable)
5779 {
5780         uint32_t temp, data;
5781
5782         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5783
5784         /* It is disabled by HW by default */
5785         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5786                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5787                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5788                                 /* 1 - RLC memory Light sleep */
5789                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5790
5791                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5792                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5793                 }
5794
5795                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5796                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5797                 if (adev->flags & AMD_IS_APU)
5798                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5799                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5800                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5801                 else
5802                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5803                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5804                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5805                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5806
5807                 if (temp != data)
5808                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5809
5810                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5811                 gfx_v8_0_wait_for_rlc_serdes(adev);
5812
5813                 /* 5 - clear mgcg override */
5814                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5815
5816                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5817                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5818                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5819                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5820                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5821                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5822                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5823                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5824                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5825                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5826                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5827                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5828                         if (temp != data)
5829                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5830                 }
5831                 udelay(50);
5832
5833                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5834                 gfx_v8_0_wait_for_rlc_serdes(adev);
5835         } else {
5836                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5837                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5838                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5839                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5840                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5841                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5842                 if (temp != data)
5843                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5844
5845                 /* 2 - disable MGLS in RLC */
5846                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5847                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5848                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5849                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5850                 }
5851
5852                 /* 3 - disable MGLS in CP */
5853                 data = RREG32(mmCP_MEM_SLP_CNTL);
5854                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5855                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5856                         WREG32(mmCP_MEM_SLP_CNTL, data);
5857                 }
5858
5859                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5860                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5861                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5862                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5863                 if (temp != data)
5864                         WREG32(mmCGTS_SM_CTRL_REG, data);
5865
5866                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5867                 gfx_v8_0_wait_for_rlc_serdes(adev);
5868
5869                 /* 6 - set mgcg override */
5870                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5871
5872                 udelay(50);
5873
5874                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5875                 gfx_v8_0_wait_for_rlc_serdes(adev);
5876         }
5877
5878         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5879 }
5880
5881 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5882                                                       bool enable)
5883 {
5884         uint32_t temp, temp1, data, data1;
5885
5886         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5887
5888         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5889
5890         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5891                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5892                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5893                 if (temp1 != data1)
5894                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5895
5896                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5897                 gfx_v8_0_wait_for_rlc_serdes(adev);
5898
5899                 /* 2 - clear cgcg override */
5900                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5901
5902                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5903                 gfx_v8_0_wait_for_rlc_serdes(adev);
5904
5905                 /* 3 - write cmd to set CGLS */
5906                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5907
5908                 /* 4 - enable cgcg */
5909                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5910
5911                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5912                         /* enable cgls*/
5913                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5914
5915                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5916                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5917
5918                         if (temp1 != data1)
5919                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5920                 } else {
5921                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5922                 }
5923
5924                 if (temp != data)
5925                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5926
5927                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5928                  * Cmp_busy/GFX_Idle interrupts
5929                  */
5930                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5931         } else {
5932                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5933                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5934
5935                 /* TEST CGCG */
5936                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5937                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5938                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5939                 if (temp1 != data1)
5940                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5941
5942                 /* read gfx register to wake up cgcg */
5943                 RREG32(mmCB_CGTT_SCLK_CTRL);
5944                 RREG32(mmCB_CGTT_SCLK_CTRL);
5945                 RREG32(mmCB_CGTT_SCLK_CTRL);
5946                 RREG32(mmCB_CGTT_SCLK_CTRL);
5947
5948                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5949                 gfx_v8_0_wait_for_rlc_serdes(adev);
5950
5951                 /* write cmd to Set CGCG Overrride */
5952                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5953
5954                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5955                 gfx_v8_0_wait_for_rlc_serdes(adev);
5956
5957                 /* write cmd to Clear CGLS */
5958                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5959
5960                 /* disable cgcg, cgls should be disabled too. */
5961                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5962                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5963                 if (temp != data)
5964                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5965                 /* enable interrupts again for PG */
5966                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5967         }
5968
5969         gfx_v8_0_wait_for_rlc_serdes(adev);
5970
5971         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5972 }
5973 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5974                                             bool enable)
5975 {
5976         if (enable) {
5977                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5978                  * ===  MGCG + MGLS + TS(CG/LS) ===
5979                  */
5980                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5981                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5982         } else {
5983                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5984                  * ===  CGCG + CGLS ===
5985                  */
5986                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5987                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5988         }
5989         return 0;
5990 }
5991
5992 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5993                                           enum amd_clockgating_state state)
5994 {
5995         uint32_t msg_id, pp_state = 0;
5996         uint32_t pp_support_state = 0;
5997
5998         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5999                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6000                         pp_support_state = PP_STATE_SUPPORT_LS;
6001                         pp_state = PP_STATE_LS;
6002                 }
6003                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6004                         pp_support_state |= PP_STATE_SUPPORT_CG;
6005                         pp_state |= PP_STATE_CG;
6006                 }
6007                 if (state == AMD_CG_STATE_UNGATE)
6008                         pp_state = 0;
6009
6010                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6011                                 PP_BLOCK_GFX_CG,
6012                                 pp_support_state,
6013                                 pp_state);
6014                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6015                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6016         }
6017
6018         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6019                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6020                         pp_support_state = PP_STATE_SUPPORT_LS;
6021                         pp_state = PP_STATE_LS;
6022                 }
6023
6024                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6025                         pp_support_state |= PP_STATE_SUPPORT_CG;
6026                         pp_state |= PP_STATE_CG;
6027                 }
6028
6029                 if (state == AMD_CG_STATE_UNGATE)
6030                         pp_state = 0;
6031
6032                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6033                                 PP_BLOCK_GFX_MG,
6034                                 pp_support_state,
6035                                 pp_state);
6036                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6037                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6038         }
6039
6040         return 0;
6041 }
6042
6043 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6044                                           enum amd_clockgating_state state)
6045 {
6046
6047         uint32_t msg_id, pp_state = 0;
6048         uint32_t pp_support_state = 0;
6049
6050         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6051                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6052                         pp_support_state = PP_STATE_SUPPORT_LS;
6053                         pp_state = PP_STATE_LS;
6054                 }
6055                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6056                         pp_support_state |= PP_STATE_SUPPORT_CG;
6057                         pp_state |= PP_STATE_CG;
6058                 }
6059                 if (state == AMD_CG_STATE_UNGATE)
6060                         pp_state = 0;
6061
6062                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6063                                 PP_BLOCK_GFX_CG,
6064                                 pp_support_state,
6065                                 pp_state);
6066                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6067                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6068         }
6069
6070         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6071                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6072                         pp_support_state = PP_STATE_SUPPORT_LS;
6073                         pp_state = PP_STATE_LS;
6074                 }
6075                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6076                         pp_support_state |= PP_STATE_SUPPORT_CG;
6077                         pp_state |= PP_STATE_CG;
6078                 }
6079                 if (state == AMD_CG_STATE_UNGATE)
6080                         pp_state = 0;
6081
6082                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6083                                 PP_BLOCK_GFX_3D,
6084                                 pp_support_state,
6085                                 pp_state);
6086                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6087                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6088         }
6089
6090         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6091                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6092                         pp_support_state = PP_STATE_SUPPORT_LS;
6093                         pp_state = PP_STATE_LS;
6094                 }
6095
6096                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6097                         pp_support_state |= PP_STATE_SUPPORT_CG;
6098                         pp_state |= PP_STATE_CG;
6099                 }
6100
6101                 if (state == AMD_CG_STATE_UNGATE)
6102                         pp_state = 0;
6103
6104                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6105                                 PP_BLOCK_GFX_MG,
6106                                 pp_support_state,
6107                                 pp_state);
6108                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6109                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6110         }
6111
6112         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6113                 pp_support_state = PP_STATE_SUPPORT_LS;
6114
6115                 if (state == AMD_CG_STATE_UNGATE)
6116                         pp_state = 0;
6117                 else
6118                         pp_state = PP_STATE_LS;
6119
6120                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6121                                 PP_BLOCK_GFX_RLC,
6122                                 pp_support_state,
6123                                 pp_state);
6124                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6125                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6126         }
6127
6128         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6129                 pp_support_state = PP_STATE_SUPPORT_LS;
6130
6131                 if (state == AMD_CG_STATE_UNGATE)
6132                         pp_state = 0;
6133                 else
6134                         pp_state = PP_STATE_LS;
6135                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6136                         PP_BLOCK_GFX_CP,
6137                         pp_support_state,
6138                         pp_state);
6139                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6140                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6141         }
6142
6143         return 0;
6144 }
6145
6146 static int gfx_v8_0_set_clockgating_state(void *handle,
6147                                           enum amd_clockgating_state state)
6148 {
6149         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6150
6151         if (amdgpu_sriov_vf(adev))
6152                 return 0;
6153
6154         switch (adev->asic_type) {
6155         case CHIP_FIJI:
6156         case CHIP_CARRIZO:
6157         case CHIP_STONEY:
6158                 gfx_v8_0_update_gfx_clock_gating(adev,
6159                                                  state == AMD_CG_STATE_GATE);
6160                 break;
6161         case CHIP_TONGA:
6162                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6163                 break;
6164         case CHIP_POLARIS10:
6165         case CHIP_POLARIS11:
6166         case CHIP_POLARIS12:
6167                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6168                 break;
6169         default:
6170                 break;
6171         }
6172         return 0;
6173 }
6174
6175 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6176 {
6177         return ring->adev->wb.wb[ring->rptr_offs];
6178 }
6179
6180 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6181 {
6182         struct amdgpu_device *adev = ring->adev;
6183
6184         if (ring->use_doorbell)
6185                 /* XXX check if swapping is necessary on BE */
6186                 return ring->adev->wb.wb[ring->wptr_offs];
6187         else
6188                 return RREG32(mmCP_RB0_WPTR);
6189 }
6190
6191 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6192 {
6193         struct amdgpu_device *adev = ring->adev;
6194
6195         if (ring->use_doorbell) {
6196                 /* XXX check if swapping is necessary on BE */
6197                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6198                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6199         } else {
6200                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6201                 (void)RREG32(mmCP_RB0_WPTR);
6202         }
6203 }
6204
6205 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6206 {
6207         u32 ref_and_mask, reg_mem_engine;
6208
6209         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6210             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6211                 switch (ring->me) {
6212                 case 1:
6213                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6214                         break;
6215                 case 2:
6216                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6217                         break;
6218                 default:
6219                         return;
6220                 }
6221                 reg_mem_engine = 0;
6222         } else {
6223                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6224                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6225         }
6226
6227         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6228         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6229                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6230                                  reg_mem_engine));
6231         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6232         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6233         amdgpu_ring_write(ring, ref_and_mask);
6234         amdgpu_ring_write(ring, ref_and_mask);
6235         amdgpu_ring_write(ring, 0x20); /* poll interval */
6236 }
6237
6238 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6239 {
6240         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6241         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6242                 EVENT_INDEX(4));
6243
6244         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6245         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6246                 EVENT_INDEX(0));
6247 }
6248
6249
6250 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6251 {
6252         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6253         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6254                                  WRITE_DATA_DST_SEL(0) |
6255                                  WR_CONFIRM));
6256         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6257         amdgpu_ring_write(ring, 0);
6258         amdgpu_ring_write(ring, 1);
6259
6260 }
6261
6262 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6263                                       struct amdgpu_ib *ib,
6264                                       unsigned vm_id, bool ctx_switch)
6265 {
6266         u32 header, control = 0;
6267
6268         if (ib->flags & AMDGPU_IB_FLAG_CE)
6269                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6270         else
6271                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6272
6273         control |= ib->length_dw | (vm_id << 24);
6274
6275         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6276                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6277
6278                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6279                         gfx_v8_0_ring_emit_de_meta(ring);
6280         }
6281
6282         amdgpu_ring_write(ring, header);
6283         amdgpu_ring_write(ring,
6284 #ifdef __BIG_ENDIAN
6285                           (2 << 0) |
6286 #endif
6287                           (ib->gpu_addr & 0xFFFFFFFC));
6288         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6289         amdgpu_ring_write(ring, control);
6290 }
6291
6292 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6293                                           struct amdgpu_ib *ib,
6294                                           unsigned vm_id, bool ctx_switch)
6295 {
6296         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6297
6298         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6299         amdgpu_ring_write(ring,
6300 #ifdef __BIG_ENDIAN
6301                                 (2 << 0) |
6302 #endif
6303                                 (ib->gpu_addr & 0xFFFFFFFC));
6304         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6305         amdgpu_ring_write(ring, control);
6306 }
6307
6308 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6309                                          u64 seq, unsigned flags)
6310 {
6311         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6312         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6313
6314         /* EVENT_WRITE_EOP - flush caches, send int */
6315         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6316         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6317                                  EOP_TC_ACTION_EN |
6318                                  EOP_TC_WB_ACTION_EN |
6319                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6320                                  EVENT_INDEX(5)));
6321         amdgpu_ring_write(ring, addr & 0xfffffffc);
6322         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6323                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6324         amdgpu_ring_write(ring, lower_32_bits(seq));
6325         amdgpu_ring_write(ring, upper_32_bits(seq));
6326
6327 }
6328
6329 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6330 {
6331         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6332         uint32_t seq = ring->fence_drv.sync_seq;
6333         uint64_t addr = ring->fence_drv.gpu_addr;
6334
6335         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6336         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6337                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6338                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6339         amdgpu_ring_write(ring, addr & 0xfffffffc);
6340         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6341         amdgpu_ring_write(ring, seq);
6342         amdgpu_ring_write(ring, 0xffffffff);
6343         amdgpu_ring_write(ring, 4); /* poll interval */
6344 }
6345
6346 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6347                                         unsigned vm_id, uint64_t pd_addr)
6348 {
6349         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6350
6351         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6352         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6353                                  WRITE_DATA_DST_SEL(0)) |
6354                                  WR_CONFIRM);
6355         if (vm_id < 8) {
6356                 amdgpu_ring_write(ring,
6357                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6358         } else {
6359                 amdgpu_ring_write(ring,
6360                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6361         }
6362         amdgpu_ring_write(ring, 0);
6363         amdgpu_ring_write(ring, pd_addr >> 12);
6364
6365         /* bits 0-15 are the VM contexts0-15 */
6366         /* invalidate the cache */
6367         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6368         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6369                                  WRITE_DATA_DST_SEL(0)));
6370         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6371         amdgpu_ring_write(ring, 0);
6372         amdgpu_ring_write(ring, 1 << vm_id);
6373
6374         /* wait for the invalidate to complete */
6375         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6376         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6377                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6378                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6379         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6380         amdgpu_ring_write(ring, 0);
6381         amdgpu_ring_write(ring, 0); /* ref */
6382         amdgpu_ring_write(ring, 0); /* mask */
6383         amdgpu_ring_write(ring, 0x20); /* poll interval */
6384
6385         /* compute doesn't have PFP */
6386         if (usepfp) {
6387                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6388                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6389                 amdgpu_ring_write(ring, 0x0);
6390         }
6391 }
6392
6393 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6394 {
6395         return ring->adev->wb.wb[ring->wptr_offs];
6396 }
6397
6398 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6399 {
6400         struct amdgpu_device *adev = ring->adev;
6401
6402         /* XXX check if swapping is necessary on BE */
6403         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6404         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6405 }
6406
6407 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6408                                            bool acquire)
6409 {
6410         struct amdgpu_device *adev = ring->adev;
6411         int pipe_num, tmp, reg;
6412         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6413
6414         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6415
6416         /* first me only has 2 entries, GFX and HP3D */
6417         if (ring->me > 0)
6418                 pipe_num -= 2;
6419
6420         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6421         tmp = RREG32(reg);
6422         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6423         WREG32(reg, tmp);
6424 }
6425
6426 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6427                                             struct amdgpu_ring *ring,
6428                                             bool acquire)
6429 {
6430         int i, pipe;
6431         bool reserve;
6432         struct amdgpu_ring *iring;
6433
6434         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6435         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6436         if (acquire)
6437                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6438         else
6439                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6440
6441         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6442                 /* Clear all reservations - everyone reacquires all resources */
6443                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6444                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6445                                                        true);
6446
6447                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6448                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6449                                                        true);
6450         } else {
6451                 /* Lower all pipes without a current reservation */
6452                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6453                         iring = &adev->gfx.gfx_ring[i];
6454                         pipe = amdgpu_gfx_queue_to_bit(adev,
6455                                                        iring->me,
6456                                                        iring->pipe,
6457                                                        0);
6458                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6459                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6460                 }
6461
6462                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6463                         iring = &adev->gfx.compute_ring[i];
6464                         pipe = amdgpu_gfx_queue_to_bit(adev,
6465                                                        iring->me,
6466                                                        iring->pipe,
6467                                                        0);
6468                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6469                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6470                 }
6471         }
6472
6473         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6474 }
6475
6476 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6477                                       struct amdgpu_ring *ring,
6478                                       bool acquire)
6479 {
6480         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6481         uint32_t queue_priority = acquire ? 0xf : 0x0;
6482
6483         mutex_lock(&adev->srbm_mutex);
6484         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6485
6486         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6487         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6488
6489         vi_srbm_select(adev, 0, 0, 0, 0);
6490         mutex_unlock(&adev->srbm_mutex);
6491 }
6492 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6493                                                enum amd_sched_priority priority)
6494 {
6495         struct amdgpu_device *adev = ring->adev;
6496         bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW;
6497
6498         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6499                 return;
6500
6501         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6502         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6503 }
6504
6505 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6506                                              u64 addr, u64 seq,
6507                                              unsigned flags)
6508 {
6509         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6510         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6511
6512         /* RELEASE_MEM - flush caches, send int */
6513         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6514         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6515                                  EOP_TC_ACTION_EN |
6516                                  EOP_TC_WB_ACTION_EN |
6517                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6518                                  EVENT_INDEX(5)));
6519         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6520         amdgpu_ring_write(ring, addr & 0xfffffffc);
6521         amdgpu_ring_write(ring, upper_32_bits(addr));
6522         amdgpu_ring_write(ring, lower_32_bits(seq));
6523         amdgpu_ring_write(ring, upper_32_bits(seq));
6524 }
6525
6526 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6527                                          u64 seq, unsigned int flags)
6528 {
6529         /* we only allocate 32bit for each seq wb address */
6530         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6531
6532         /* write fence seq to the "addr" */
6533         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6534         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6535                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6536         amdgpu_ring_write(ring, lower_32_bits(addr));
6537         amdgpu_ring_write(ring, upper_32_bits(addr));
6538         amdgpu_ring_write(ring, lower_32_bits(seq));
6539
6540         if (flags & AMDGPU_FENCE_FLAG_INT) {
6541                 /* set register to trigger INT */
6542                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6543                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6544                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6545                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6546                 amdgpu_ring_write(ring, 0);
6547                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6548         }
6549 }
6550
6551 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6552 {
6553         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6554         amdgpu_ring_write(ring, 0);
6555 }
6556
6557 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6558 {
6559         uint32_t dw2 = 0;
6560
6561         if (amdgpu_sriov_vf(ring->adev))
6562                 gfx_v8_0_ring_emit_ce_meta(ring);
6563
6564         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6565         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6566                 gfx_v8_0_ring_emit_vgt_flush(ring);
6567                 /* set load_global_config & load_global_uconfig */
6568                 dw2 |= 0x8001;
6569                 /* set load_cs_sh_regs */
6570                 dw2 |= 0x01000000;
6571                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6572                 dw2 |= 0x10002;
6573
6574                 /* set load_ce_ram if preamble presented */
6575                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6576                         dw2 |= 0x10000000;
6577         } else {
6578                 /* still load_ce_ram if this is the first time preamble presented
6579                  * although there is no context switch happens.
6580                  */
6581                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6582                         dw2 |= 0x10000000;
6583         }
6584
6585         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6586         amdgpu_ring_write(ring, dw2);
6587         amdgpu_ring_write(ring, 0);
6588 }
6589
6590 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6591 {
6592         unsigned ret;
6593
6594         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6595         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6596         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6597         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6598         ret = ring->wptr & ring->buf_mask;
6599         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6600         return ret;
6601 }
6602
6603 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6604 {
6605         unsigned cur;
6606
6607         BUG_ON(offset > ring->buf_mask);
6608         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6609
6610         cur = (ring->wptr & ring->buf_mask) - 1;
6611         if (likely(cur > offset))
6612                 ring->ring[offset] = cur - offset;
6613         else
6614                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6615 }
6616
6617 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6618 {
6619         struct amdgpu_device *adev = ring->adev;
6620
6621         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6622         amdgpu_ring_write(ring, 0 |     /* src: register*/
6623                                 (5 << 8) |      /* dst: memory */
6624                                 (1 << 20));     /* write confirm */
6625         amdgpu_ring_write(ring, reg);
6626         amdgpu_ring_write(ring, 0);
6627         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6628                                 adev->virt.reg_val_offs * 4));
6629         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6630                                 adev->virt.reg_val_offs * 4));
6631 }
6632
6633 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6634                                   uint32_t val)
6635 {
6636         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6637         amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
6638         amdgpu_ring_write(ring, reg);
6639         amdgpu_ring_write(ring, 0);
6640         amdgpu_ring_write(ring, val);
6641 }
6642
6643 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6644                                                  enum amdgpu_interrupt_state state)
6645 {
6646         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6647                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6648 }
6649
6650 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6651                                                      int me, int pipe,
6652                                                      enum amdgpu_interrupt_state state)
6653 {
6654         u32 mec_int_cntl, mec_int_cntl_reg;
6655
6656         /*
6657          * amdgpu controls only the first MEC. That's why this function only
6658          * handles the setting of interrupts for this specific MEC. All other
6659          * pipes' interrupts are set by amdkfd.
6660          */
6661
6662         if (me == 1) {
6663                 switch (pipe) {
6664                 case 0:
6665                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6666                         break;
6667                 case 1:
6668                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6669                         break;
6670                 case 2:
6671                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6672                         break;
6673                 case 3:
6674                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6675                         break;
6676                 default:
6677                         DRM_DEBUG("invalid pipe %d\n", pipe);
6678                         return;
6679                 }
6680         } else {
6681                 DRM_DEBUG("invalid me %d\n", me);
6682                 return;
6683         }
6684
6685         switch (state) {
6686         case AMDGPU_IRQ_STATE_DISABLE:
6687                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6688                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6689                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6690                 break;
6691         case AMDGPU_IRQ_STATE_ENABLE:
6692                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6693                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6694                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6695                 break;
6696         default:
6697                 break;
6698         }
6699 }
6700
6701 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6702                                              struct amdgpu_irq_src *source,
6703                                              unsigned type,
6704                                              enum amdgpu_interrupt_state state)
6705 {
6706         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6707                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6708
6709         return 0;
6710 }
6711
6712 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6713                                               struct amdgpu_irq_src *source,
6714                                               unsigned type,
6715                                               enum amdgpu_interrupt_state state)
6716 {
6717         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6718                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6719
6720         return 0;
6721 }
6722
6723 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6724                                             struct amdgpu_irq_src *src,
6725                                             unsigned type,
6726                                             enum amdgpu_interrupt_state state)
6727 {
6728         switch (type) {
6729         case AMDGPU_CP_IRQ_GFX_EOP:
6730                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6731                 break;
6732         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6733                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6734                 break;
6735         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6736                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6737                 break;
6738         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6739                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6740                 break;
6741         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6742                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6743                 break;
6744         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6745                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6746                 break;
6747         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6748                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6749                 break;
6750         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6751                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6752                 break;
6753         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6754                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6755                 break;
6756         default:
6757                 break;
6758         }
6759         return 0;
6760 }
6761
6762 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6763                             struct amdgpu_irq_src *source,
6764                             struct amdgpu_iv_entry *entry)
6765 {
6766         int i;
6767         u8 me_id, pipe_id, queue_id;
6768         struct amdgpu_ring *ring;
6769
6770         DRM_DEBUG("IH: CP EOP\n");
6771         me_id = (entry->ring_id & 0x0c) >> 2;
6772         pipe_id = (entry->ring_id & 0x03) >> 0;
6773         queue_id = (entry->ring_id & 0x70) >> 4;
6774
6775         switch (me_id) {
6776         case 0:
6777                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6778                 break;
6779         case 1:
6780         case 2:
6781                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6782                         ring = &adev->gfx.compute_ring[i];
6783                         /* Per-queue interrupt is supported for MEC starting from VI.
6784                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6785                           */
6786                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6787                                 amdgpu_fence_process(ring);
6788                 }
6789                 break;
6790         }
6791         return 0;
6792 }
6793
6794 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6795                                  struct amdgpu_irq_src *source,
6796                                  struct amdgpu_iv_entry *entry)
6797 {
6798         DRM_ERROR("Illegal register access in command stream\n");
6799         schedule_work(&adev->reset_work);
6800         return 0;
6801 }
6802
6803 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6804                                   struct amdgpu_irq_src *source,
6805                                   struct amdgpu_iv_entry *entry)
6806 {
6807         DRM_ERROR("Illegal instruction in command stream\n");
6808         schedule_work(&adev->reset_work);
6809         return 0;
6810 }
6811
6812 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6813                                             struct amdgpu_irq_src *src,
6814                                             unsigned int type,
6815                                             enum amdgpu_interrupt_state state)
6816 {
6817         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6818
6819         switch (type) {
6820         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6821                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6822                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6823                 if (ring->me == 1)
6824                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6825                                      ring->pipe,
6826                                      GENERIC2_INT_ENABLE,
6827                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6828                 else
6829                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6830                                      ring->pipe,
6831                                      GENERIC2_INT_ENABLE,
6832                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6833                 break;
6834         default:
6835                 BUG(); /* kiq only support GENERIC2_INT now */
6836                 break;
6837         }
6838         return 0;
6839 }
6840
6841 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6842                             struct amdgpu_irq_src *source,
6843                             struct amdgpu_iv_entry *entry)
6844 {
6845         u8 me_id, pipe_id, queue_id;
6846         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6847
6848         me_id = (entry->ring_id & 0x0c) >> 2;
6849         pipe_id = (entry->ring_id & 0x03) >> 0;
6850         queue_id = (entry->ring_id & 0x70) >> 4;
6851         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6852                    me_id, pipe_id, queue_id);
6853
6854         amdgpu_fence_process(ring);
6855         return 0;
6856 }
6857
6858 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6859         .name = "gfx_v8_0",
6860         .early_init = gfx_v8_0_early_init,
6861         .late_init = gfx_v8_0_late_init,
6862         .sw_init = gfx_v8_0_sw_init,
6863         .sw_fini = gfx_v8_0_sw_fini,
6864         .hw_init = gfx_v8_0_hw_init,
6865         .hw_fini = gfx_v8_0_hw_fini,
6866         .suspend = gfx_v8_0_suspend,
6867         .resume = gfx_v8_0_resume,
6868         .is_idle = gfx_v8_0_is_idle,
6869         .wait_for_idle = gfx_v8_0_wait_for_idle,
6870         .check_soft_reset = gfx_v8_0_check_soft_reset,
6871         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6872         .soft_reset = gfx_v8_0_soft_reset,
6873         .post_soft_reset = gfx_v8_0_post_soft_reset,
6874         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6875         .set_powergating_state = gfx_v8_0_set_powergating_state,
6876         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6877 };
6878
6879 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6880         .type = AMDGPU_RING_TYPE_GFX,
6881         .align_mask = 0xff,
6882         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6883         .support_64bit_ptrs = false,
6884         .get_rptr = gfx_v8_0_ring_get_rptr,
6885         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6886         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6887         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6888                 5 +  /* COND_EXEC */
6889                 7 +  /* PIPELINE_SYNC */
6890                 19 + /* VM_FLUSH */
6891                 8 +  /* FENCE for VM_FLUSH */
6892                 20 + /* GDS switch */
6893                 4 + /* double SWITCH_BUFFER,
6894                        the first COND_EXEC jump to the place just
6895                            prior to this double SWITCH_BUFFER  */
6896                 5 + /* COND_EXEC */
6897                 7 +      /*     HDP_flush */
6898                 4 +      /*     VGT_flush */
6899                 14 + /* CE_META */
6900                 31 + /* DE_META */
6901                 3 + /* CNTX_CTRL */
6902                 5 + /* HDP_INVL */
6903                 8 + 8 + /* FENCE x2 */
6904                 2, /* SWITCH_BUFFER */
6905         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6906         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6907         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6908         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6909         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6910         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6911         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6912         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6913         .test_ring = gfx_v8_0_ring_test_ring,
6914         .test_ib = gfx_v8_0_ring_test_ib,
6915         .insert_nop = amdgpu_ring_insert_nop,
6916         .pad_ib = amdgpu_ring_generic_pad_ib,
6917         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6918         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6919         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6920         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6921 };
6922
6923 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6924         .type = AMDGPU_RING_TYPE_COMPUTE,
6925         .align_mask = 0xff,
6926         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6927         .support_64bit_ptrs = false,
6928         .get_rptr = gfx_v8_0_ring_get_rptr,
6929         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6930         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6931         .emit_frame_size =
6932                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6933                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6934                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6935                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6936                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6937                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6938         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6939         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6940         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6941         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6942         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6943         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6944         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6945         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6946         .test_ring = gfx_v8_0_ring_test_ring,
6947         .test_ib = gfx_v8_0_ring_test_ib,
6948         .insert_nop = amdgpu_ring_insert_nop,
6949         .pad_ib = amdgpu_ring_generic_pad_ib,
6950         .set_priority = gfx_v8_0_ring_set_priority_compute,
6951 };
6952
6953 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6954         .type = AMDGPU_RING_TYPE_KIQ,
6955         .align_mask = 0xff,
6956         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6957         .support_64bit_ptrs = false,
6958         .get_rptr = gfx_v8_0_ring_get_rptr,
6959         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6960         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6961         .emit_frame_size =
6962                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6963                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6964                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6965                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6966                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6967                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6968         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6969         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6970         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6971         .test_ring = gfx_v8_0_ring_test_ring,
6972         .test_ib = gfx_v8_0_ring_test_ib,
6973         .insert_nop = amdgpu_ring_insert_nop,
6974         .pad_ib = amdgpu_ring_generic_pad_ib,
6975         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6976         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6977 };
6978
6979 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6980 {
6981         int i;
6982
6983         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6984
6985         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6986                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6987
6988         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6989                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6990 }
6991
6992 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6993         .set = gfx_v8_0_set_eop_interrupt_state,
6994         .process = gfx_v8_0_eop_irq,
6995 };
6996
6997 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6998         .set = gfx_v8_0_set_priv_reg_fault_state,
6999         .process = gfx_v8_0_priv_reg_irq,
7000 };
7001
7002 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7003         .set = gfx_v8_0_set_priv_inst_fault_state,
7004         .process = gfx_v8_0_priv_inst_irq,
7005 };
7006
7007 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7008         .set = gfx_v8_0_kiq_set_interrupt_state,
7009         .process = gfx_v8_0_kiq_irq,
7010 };
7011
7012 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7013 {
7014         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7015         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7016
7017         adev->gfx.priv_reg_irq.num_types = 1;
7018         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7019
7020         adev->gfx.priv_inst_irq.num_types = 1;
7021         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7022
7023         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7024         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7025 }
7026
7027 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7028 {
7029         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7030 }
7031
7032 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7033 {
7034         /* init asci gds info */
7035         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7036         adev->gds.gws.total_size = 64;
7037         adev->gds.oa.total_size = 16;
7038
7039         if (adev->gds.mem.total_size == 64 * 1024) {
7040                 adev->gds.mem.gfx_partition_size = 4096;
7041                 adev->gds.mem.cs_partition_size = 4096;
7042
7043                 adev->gds.gws.gfx_partition_size = 4;
7044                 adev->gds.gws.cs_partition_size = 4;
7045
7046                 adev->gds.oa.gfx_partition_size = 4;
7047                 adev->gds.oa.cs_partition_size = 1;
7048         } else {
7049                 adev->gds.mem.gfx_partition_size = 1024;
7050                 adev->gds.mem.cs_partition_size = 1024;
7051
7052                 adev->gds.gws.gfx_partition_size = 16;
7053                 adev->gds.gws.cs_partition_size = 16;
7054
7055                 adev->gds.oa.gfx_partition_size = 4;
7056                 adev->gds.oa.cs_partition_size = 4;
7057         }
7058 }
7059
7060 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7061                                                  u32 bitmap)
7062 {
7063         u32 data;
7064
7065         if (!bitmap)
7066                 return;
7067
7068         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7069         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7070
7071         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7072 }
7073
7074 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7075 {
7076         u32 data, mask;
7077
7078         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7079                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7080
7081         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7082
7083         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7084 }
7085
7086 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7087 {
7088         int i, j, k, counter, active_cu_number = 0;
7089         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7090         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7091         unsigned disable_masks[4 * 2];
7092         u32 ao_cu_num;
7093
7094         memset(cu_info, 0, sizeof(*cu_info));
7095
7096         if (adev->flags & AMD_IS_APU)
7097                 ao_cu_num = 2;
7098         else
7099                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7100
7101         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7102
7103         mutex_lock(&adev->grbm_idx_mutex);
7104         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7105                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7106                         mask = 1;
7107                         ao_bitmap = 0;
7108                         counter = 0;
7109                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7110                         if (i < 4 && j < 2)
7111                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7112                                         adev, disable_masks[i * 2 + j]);
7113                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7114                         cu_info->bitmap[i][j] = bitmap;
7115
7116                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7117                                 if (bitmap & mask) {
7118                                         if (counter < ao_cu_num)
7119                                                 ao_bitmap |= mask;
7120                                         counter ++;
7121                                 }
7122                                 mask <<= 1;
7123                         }
7124                         active_cu_number += counter;
7125                         if (i < 2 && j < 2)
7126                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7127                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7128                 }
7129         }
7130         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7131         mutex_unlock(&adev->grbm_idx_mutex);
7132
7133         cu_info->number = active_cu_number;
7134         cu_info->ao_cu_mask = ao_cu_mask;
7135 }
7136
7137 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7138 {
7139         .type = AMD_IP_BLOCK_TYPE_GFX,
7140         .major = 8,
7141         .minor = 0,
7142         .rev = 0,
7143         .funcs = &gfx_v8_0_ip_funcs,
7144 };
7145
7146 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7147 {
7148         .type = AMD_IP_BLOCK_TYPE_GFX,
7149         .major = 8,
7150         .minor = 1,
7151         .rev = 0,
7152         .funcs = &gfx_v8_0_ip_funcs,
7153 };
7154
7155 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7156 {
7157         uint64_t ce_payload_addr;
7158         int cnt_ce;
7159         union {
7160                 struct vi_ce_ib_state regular;
7161                 struct vi_ce_ib_state_chained_ib chained;
7162         } ce_payload = {};
7163
7164         if (ring->adev->virt.chained_ib_support) {
7165                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7166                                                   offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7167                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7168         } else {
7169                 ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
7170                                                   offsetof(struct vi_gfx_meta_data, ce_payload);
7171                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7172         }
7173
7174         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7175         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7176                                 WRITE_DATA_DST_SEL(8) |
7177                                 WR_CONFIRM) |
7178                                 WRITE_DATA_CACHE_POLICY(0));
7179         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7180         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7181         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7182 }
7183
7184 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7185 {
7186         uint64_t de_payload_addr, gds_addr, csa_addr;
7187         int cnt_de;
7188         union {
7189                 struct vi_de_ib_state regular;
7190                 struct vi_de_ib_state_chained_ib chained;
7191         } de_payload = {};
7192
7193         csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
7194         gds_addr = csa_addr + 4096;
7195         if (ring->adev->virt.chained_ib_support) {
7196                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7197                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7198                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7199                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7200         } else {
7201                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7202                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7203                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7204                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7205         }
7206
7207         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7208         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7209                                 WRITE_DATA_DST_SEL(8) |
7210                                 WR_CONFIRM) |
7211                                 WRITE_DATA_CACHE_POLICY(0));
7212         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7213         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7214         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7215 }