Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_MEC_HPD_SIZE 2048
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
135 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
139
140 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
147 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
151
152 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
153 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
163
164 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
165 {
166         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
167         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
168         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
169         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
170         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
171         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
172         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
173         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
174         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
175         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
176         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
177         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
178         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
179         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
180         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
181         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
182 };
183
184 static const u32 golden_settings_tonga_a11[] =
185 {
186         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
187         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
188         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
189         mmGB_GPU_ID, 0x0000000f, 0x00000000,
190         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
191         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
192         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
193         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
194         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
195         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
196         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
197         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
198         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
199         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
200         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
201         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
202 };
203
204 static const u32 tonga_golden_common_all[] =
205 {
206         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
207         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
208         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
209         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
210         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
211         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
212         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
213         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
214 };
215
216 static const u32 tonga_mgcg_cgcg_init[] =
217 {
218         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
219         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
220         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
221         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
222         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
223         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
224         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
225         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
227         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
228         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
229         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
236         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
237         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
238         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
239         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
240         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
243         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
244         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
245         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
246         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
247         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
248         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
249         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
275         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
276         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
277         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
278         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
279         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
280         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
281         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
282         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
283         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
284         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
285         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
286         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
287         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
288         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
289         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
290         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
291         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
292         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
293 };
294
295 static const u32 golden_settings_polaris11_a11[] =
296 {
297         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
298         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
299         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
300         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
301         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
302         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
303         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
304         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
305         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
306         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
307         mmSQ_CONFIG, 0x07f80000, 0x01180000,
308         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
309         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
310         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
311         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
312         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
313         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
314 };
315
316 static const u32 polaris11_golden_common_all[] =
317 {
318         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
319         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
320         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
321         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
322         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
323         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
324 };
325
326 static const u32 golden_settings_polaris10_a11[] =
327 {
328         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
329         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
330         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
331         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
332         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
333         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
334         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
335         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
337         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
338         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
339         mmSQ_CONFIG, 0x07f80000, 0x07180000,
340         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
341         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
342         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
343         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
344         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
345 };
346
347 static const u32 polaris10_golden_common_all[] =
348 {
349         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
350         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
351         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
352         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
353         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
354         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
355         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
356         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
357 };
358
359 static const u32 fiji_golden_common_all[] =
360 {
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
363         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
364         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
365         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
366         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
367         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
368         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
369         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
370         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
371 };
372
373 static const u32 golden_settings_fiji_a10[] =
374 {
375         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
376         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
377         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
378         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
379         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
380         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
381         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
382         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
383         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
384         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
385         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
386 };
387
388 static const u32 fiji_mgcg_cgcg_init[] =
389 {
390         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
391         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
392         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
394         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
395         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
396         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
397         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
399         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
400         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
401         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
402         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
403         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
404         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
405         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
406         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
407         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
408         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
409         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
410         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
411         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
412         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
413         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
414         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
415         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
416         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
417         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
418         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
419         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
420         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
421         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
422         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
423         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
424         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
425 };
426
427 static const u32 golden_settings_iceland_a11[] =
428 {
429         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
430         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
431         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
432         mmGB_GPU_ID, 0x0000000f, 0x00000000,
433         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
434         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
435         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
437         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
438         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
439         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
440         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
441         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
442         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
443         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
444         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
445 };
446
447 static const u32 iceland_golden_common_all[] =
448 {
449         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
450         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
451         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
452         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
453         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
454         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
455         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
456         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
457 };
458
459 static const u32 iceland_mgcg_cgcg_init[] =
460 {
461         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
462         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
463         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
465         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
466         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
467         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
468         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
469         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
470         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
471         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
472         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
473         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
474         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
475         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
476         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
477         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
478         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
479         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
480         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
481         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
482         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
483         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
484         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
485         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
486         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
487         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
488         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
489         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
490         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
491         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
492         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
495         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
500         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
508         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
509         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
510         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
511         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
512         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
513         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
514         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
515         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
516         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
517         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
518         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
519         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
520         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
521         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
522         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
523         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
524         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
525 };
526
527 static const u32 cz_golden_settings_a11[] =
528 {
529         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
530         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
531         mmGB_GPU_ID, 0x0000000f, 0x00000000,
532         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
533         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
534         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
535         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
536         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
537         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
538         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
539         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
540         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
541 };
542
543 static const u32 cz_golden_common_all[] =
544 {
545         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
546         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
547         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
548         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
549         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
550         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
551         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
552         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
553 };
554
555 static const u32 cz_mgcg_cgcg_init[] =
556 {
557         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
558         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
559         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
560         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
561         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
562         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
563         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
564         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
566         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
567         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
568         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
569         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
570         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
571         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
572         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
573         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
574         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
575         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
576         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
577         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
578         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
579         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
580         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
581         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
582         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
583         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
584         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
585         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
586         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
587         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
588         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
614         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
615         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
616         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
617         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
618         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
619         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
620         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
621         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
622         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
623         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
624         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
625         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
626         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
627         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
628         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
629         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
630         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
631         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
632 };
633
634 static const u32 stoney_golden_settings_a11[] =
635 {
636         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
637         mmGB_GPU_ID, 0x0000000f, 0x00000000,
638         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
639         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
640         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
641         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
642         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
643         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
644         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
645         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
646 };
647
648 static const u32 stoney_golden_common_all[] =
649 {
650         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
651         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
652         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
653         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
654         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
655         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
656         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
657         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
658 };
659
660 static const u32 stoney_mgcg_cgcg_init[] =
661 {
662         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
663         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
664         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
665         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
666         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
667 };
668
669 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
670 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
671 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
672 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
673 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
674 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
675 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
676 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
677
678 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
679 {
680         switch (adev->asic_type) {
681         case CHIP_TOPAZ:
682                 amdgpu_device_program_register_sequence(adev,
683                                                         iceland_mgcg_cgcg_init,
684                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
685                 amdgpu_device_program_register_sequence(adev,
686                                                         golden_settings_iceland_a11,
687                                                         ARRAY_SIZE(golden_settings_iceland_a11));
688                 amdgpu_device_program_register_sequence(adev,
689                                                         iceland_golden_common_all,
690                                                         ARRAY_SIZE(iceland_golden_common_all));
691                 break;
692         case CHIP_FIJI:
693                 amdgpu_device_program_register_sequence(adev,
694                                                         fiji_mgcg_cgcg_init,
695                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
696                 amdgpu_device_program_register_sequence(adev,
697                                                         golden_settings_fiji_a10,
698                                                         ARRAY_SIZE(golden_settings_fiji_a10));
699                 amdgpu_device_program_register_sequence(adev,
700                                                         fiji_golden_common_all,
701                                                         ARRAY_SIZE(fiji_golden_common_all));
702                 break;
703
704         case CHIP_TONGA:
705                 amdgpu_device_program_register_sequence(adev,
706                                                         tonga_mgcg_cgcg_init,
707                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
708                 amdgpu_device_program_register_sequence(adev,
709                                                         golden_settings_tonga_a11,
710                                                         ARRAY_SIZE(golden_settings_tonga_a11));
711                 amdgpu_device_program_register_sequence(adev,
712                                                         tonga_golden_common_all,
713                                                         ARRAY_SIZE(tonga_golden_common_all));
714                 break;
715         case CHIP_POLARIS11:
716         case CHIP_POLARIS12:
717                 amdgpu_device_program_register_sequence(adev,
718                                                         golden_settings_polaris11_a11,
719                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
720                 amdgpu_device_program_register_sequence(adev,
721                                                         polaris11_golden_common_all,
722                                                         ARRAY_SIZE(polaris11_golden_common_all));
723                 break;
724         case CHIP_POLARIS10:
725                 amdgpu_device_program_register_sequence(adev,
726                                                         golden_settings_polaris10_a11,
727                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
728                 amdgpu_device_program_register_sequence(adev,
729                                                         polaris10_golden_common_all,
730                                                         ARRAY_SIZE(polaris10_golden_common_all));
731                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
732                 if (adev->pdev->revision == 0xc7 &&
733                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
734                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
735                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
736                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
737                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
738                 }
739                 break;
740         case CHIP_CARRIZO:
741                 amdgpu_device_program_register_sequence(adev,
742                                                         cz_mgcg_cgcg_init,
743                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
744                 amdgpu_device_program_register_sequence(adev,
745                                                         cz_golden_settings_a11,
746                                                         ARRAY_SIZE(cz_golden_settings_a11));
747                 amdgpu_device_program_register_sequence(adev,
748                                                         cz_golden_common_all,
749                                                         ARRAY_SIZE(cz_golden_common_all));
750                 break;
751         case CHIP_STONEY:
752                 amdgpu_device_program_register_sequence(adev,
753                                                         stoney_mgcg_cgcg_init,
754                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
755                 amdgpu_device_program_register_sequence(adev,
756                                                         stoney_golden_settings_a11,
757                                                         ARRAY_SIZE(stoney_golden_settings_a11));
758                 amdgpu_device_program_register_sequence(adev,
759                                                         stoney_golden_common_all,
760                                                         ARRAY_SIZE(stoney_golden_common_all));
761                 break;
762         default:
763                 break;
764         }
765 }
766
767 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
768 {
769         adev->gfx.scratch.num_reg = 8;
770         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
771         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
772 }
773
774 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
775 {
776         struct amdgpu_device *adev = ring->adev;
777         uint32_t scratch;
778         uint32_t tmp = 0;
779         unsigned i;
780         int r;
781
782         r = amdgpu_gfx_scratch_get(adev, &scratch);
783         if (r) {
784                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
785                 return r;
786         }
787         WREG32(scratch, 0xCAFEDEAD);
788         r = amdgpu_ring_alloc(ring, 3);
789         if (r) {
790                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
791                           ring->idx, r);
792                 amdgpu_gfx_scratch_free(adev, scratch);
793                 return r;
794         }
795         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
796         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
797         amdgpu_ring_write(ring, 0xDEADBEEF);
798         amdgpu_ring_commit(ring);
799
800         for (i = 0; i < adev->usec_timeout; i++) {
801                 tmp = RREG32(scratch);
802                 if (tmp == 0xDEADBEEF)
803                         break;
804                 DRM_UDELAY(1);
805         }
806         if (i < adev->usec_timeout) {
807                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
808                          ring->idx, i);
809         } else {
810                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
811                           ring->idx, scratch, tmp);
812                 r = -EINVAL;
813         }
814         amdgpu_gfx_scratch_free(adev, scratch);
815         return r;
816 }
817
818 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
819 {
820         struct amdgpu_device *adev = ring->adev;
821         struct amdgpu_ib ib;
822         struct dma_fence *f = NULL;
823         uint32_t scratch;
824         uint32_t tmp = 0;
825         long r;
826
827         r = amdgpu_gfx_scratch_get(adev, &scratch);
828         if (r) {
829                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
830                 return r;
831         }
832         WREG32(scratch, 0xCAFEDEAD);
833         memset(&ib, 0, sizeof(ib));
834         r = amdgpu_ib_get(adev, NULL, 256, &ib);
835         if (r) {
836                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
837                 goto err1;
838         }
839         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
840         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
841         ib.ptr[2] = 0xDEADBEEF;
842         ib.length_dw = 3;
843
844         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
845         if (r)
846                 goto err2;
847
848         r = dma_fence_wait_timeout(f, false, timeout);
849         if (r == 0) {
850                 DRM_ERROR("amdgpu: IB test timed out.\n");
851                 r = -ETIMEDOUT;
852                 goto err2;
853         } else if (r < 0) {
854                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
855                 goto err2;
856         }
857         tmp = RREG32(scratch);
858         if (tmp == 0xDEADBEEF) {
859                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
860                 r = 0;
861         } else {
862                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
863                           scratch, tmp);
864                 r = -EINVAL;
865         }
866 err2:
867         amdgpu_ib_free(adev, &ib, NULL);
868         dma_fence_put(f);
869 err1:
870         amdgpu_gfx_scratch_free(adev, scratch);
871         return r;
872 }
873
874
875 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
876 {
877         release_firmware(adev->gfx.pfp_fw);
878         adev->gfx.pfp_fw = NULL;
879         release_firmware(adev->gfx.me_fw);
880         adev->gfx.me_fw = NULL;
881         release_firmware(adev->gfx.ce_fw);
882         adev->gfx.ce_fw = NULL;
883         release_firmware(adev->gfx.rlc_fw);
884         adev->gfx.rlc_fw = NULL;
885         release_firmware(adev->gfx.mec_fw);
886         adev->gfx.mec_fw = NULL;
887         if ((adev->asic_type != CHIP_STONEY) &&
888             (adev->asic_type != CHIP_TOPAZ))
889                 release_firmware(adev->gfx.mec2_fw);
890         adev->gfx.mec2_fw = NULL;
891
892         kfree(adev->gfx.rlc.register_list_format);
893 }
894
895 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
896 {
897         const char *chip_name;
898         char fw_name[30];
899         int err;
900         struct amdgpu_firmware_info *info = NULL;
901         const struct common_firmware_header *header = NULL;
902         const struct gfx_firmware_header_v1_0 *cp_hdr;
903         const struct rlc_firmware_header_v2_0 *rlc_hdr;
904         unsigned int *tmp = NULL, i;
905
906         DRM_DEBUG("\n");
907
908         switch (adev->asic_type) {
909         case CHIP_TOPAZ:
910                 chip_name = "topaz";
911                 break;
912         case CHIP_TONGA:
913                 chip_name = "tonga";
914                 break;
915         case CHIP_CARRIZO:
916                 chip_name = "carrizo";
917                 break;
918         case CHIP_FIJI:
919                 chip_name = "fiji";
920                 break;
921         case CHIP_POLARIS11:
922                 chip_name = "polaris11";
923                 break;
924         case CHIP_POLARIS10:
925                 chip_name = "polaris10";
926                 break;
927         case CHIP_POLARIS12:
928                 chip_name = "polaris12";
929                 break;
930         case CHIP_STONEY:
931                 chip_name = "stoney";
932                 break;
933         default:
934                 BUG();
935         }
936
937         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
938                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
939                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
940                 if (err == -ENOENT) {
941                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
942                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
943                 }
944         } else {
945                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
946                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
947         }
948         if (err)
949                 goto out;
950         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
951         if (err)
952                 goto out;
953         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
954         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
956
957         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
958                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
959                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
960                 if (err == -ENOENT) {
961                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
962                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
963                 }
964         } else {
965                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
966                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
967         }
968         if (err)
969                 goto out;
970         err = amdgpu_ucode_validate(adev->gfx.me_fw);
971         if (err)
972                 goto out;
973         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
974         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
975
976         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
977
978         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
979                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
980                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
981                 if (err == -ENOENT) {
982                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
983                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
984                 }
985         } else {
986                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
987                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
988         }
989         if (err)
990                 goto out;
991         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
992         if (err)
993                 goto out;
994         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
995         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
996         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
997
998         /*
999          * Support for MCBP/Virtualization in combination with chained IBs is
1000          * formal released on feature version #46
1001          */
1002         if (adev->gfx.ce_feature_version >= 46 &&
1003             adev->gfx.pfp_feature_version >= 46) {
1004                 adev->virt.chained_ib_support = true;
1005                 DRM_INFO("Chained IB support enabled!\n");
1006         } else
1007                 adev->virt.chained_ib_support = false;
1008
1009         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1010         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1011         if (err)
1012                 goto out;
1013         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1014         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1015         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1016         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1017
1018         adev->gfx.rlc.save_and_restore_offset =
1019                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1020         adev->gfx.rlc.clear_state_descriptor_offset =
1021                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1022         adev->gfx.rlc.avail_scratch_ram_locations =
1023                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1024         adev->gfx.rlc.reg_restore_list_size =
1025                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1026         adev->gfx.rlc.reg_list_format_start =
1027                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1028         adev->gfx.rlc.reg_list_format_separate_start =
1029                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1030         adev->gfx.rlc.starting_offsets_start =
1031                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1032         adev->gfx.rlc.reg_list_format_size_bytes =
1033                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1034         adev->gfx.rlc.reg_list_size_bytes =
1035                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1036
1037         adev->gfx.rlc.register_list_format =
1038                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1039                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1040
1041         if (!adev->gfx.rlc.register_list_format) {
1042                 err = -ENOMEM;
1043                 goto out;
1044         }
1045
1046         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1047                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1048         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
1049                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1050
1051         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1052
1053         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1054                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1055         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1056                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1057
1058         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1059                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1060                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1061                 if (err == -ENOENT) {
1062                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1063                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1064                 }
1065         } else {
1066                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1067                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1068         }
1069         if (err)
1070                 goto out;
1071         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1072         if (err)
1073                 goto out;
1074         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1076         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1077
1078         if ((adev->asic_type != CHIP_STONEY) &&
1079             (adev->asic_type != CHIP_TOPAZ)) {
1080                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1081                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1082                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1083                         if (err == -ENOENT) {
1084                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1085                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1086                         }
1087                 } else {
1088                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1089                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1090                 }
1091                 if (!err) {
1092                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1093                         if (err)
1094                                 goto out;
1095                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1096                                 adev->gfx.mec2_fw->data;
1097                         adev->gfx.mec2_fw_version =
1098                                 le32_to_cpu(cp_hdr->header.ucode_version);
1099                         adev->gfx.mec2_feature_version =
1100                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1101                 } else {
1102                         err = 0;
1103                         adev->gfx.mec2_fw = NULL;
1104                 }
1105         }
1106
1107         if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) {
1108                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1109                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1110                 info->fw = adev->gfx.pfp_fw;
1111                 header = (const struct common_firmware_header *)info->fw->data;
1112                 adev->firmware.fw_size +=
1113                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1114
1115                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1116                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1117                 info->fw = adev->gfx.me_fw;
1118                 header = (const struct common_firmware_header *)info->fw->data;
1119                 adev->firmware.fw_size +=
1120                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1121
1122                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1123                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1124                 info->fw = adev->gfx.ce_fw;
1125                 header = (const struct common_firmware_header *)info->fw->data;
1126                 adev->firmware.fw_size +=
1127                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1128
1129                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1130                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1131                 info->fw = adev->gfx.rlc_fw;
1132                 header = (const struct common_firmware_header *)info->fw->data;
1133                 adev->firmware.fw_size +=
1134                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1135
1136                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1137                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1138                 info->fw = adev->gfx.mec_fw;
1139                 header = (const struct common_firmware_header *)info->fw->data;
1140                 adev->firmware.fw_size +=
1141                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1142
1143                 /* we need account JT in */
1144                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1145                 adev->firmware.fw_size +=
1146                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1147
1148                 if (amdgpu_sriov_vf(adev)) {
1149                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1150                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1151                         info->fw = adev->gfx.mec_fw;
1152                         adev->firmware.fw_size +=
1153                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1154                 }
1155
1156                 if (adev->gfx.mec2_fw) {
1157                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1158                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1159                         info->fw = adev->gfx.mec2_fw;
1160                         header = (const struct common_firmware_header *)info->fw->data;
1161                         adev->firmware.fw_size +=
1162                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1163                 }
1164
1165         }
1166
1167 out:
1168         if (err) {
1169                 dev_err(adev->dev,
1170                         "gfx8: Failed to load firmware \"%s\"\n",
1171                         fw_name);
1172                 release_firmware(adev->gfx.pfp_fw);
1173                 adev->gfx.pfp_fw = NULL;
1174                 release_firmware(adev->gfx.me_fw);
1175                 adev->gfx.me_fw = NULL;
1176                 release_firmware(adev->gfx.ce_fw);
1177                 adev->gfx.ce_fw = NULL;
1178                 release_firmware(adev->gfx.rlc_fw);
1179                 adev->gfx.rlc_fw = NULL;
1180                 release_firmware(adev->gfx.mec_fw);
1181                 adev->gfx.mec_fw = NULL;
1182                 release_firmware(adev->gfx.mec2_fw);
1183                 adev->gfx.mec2_fw = NULL;
1184         }
1185         return err;
1186 }
1187
1188 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1189                                     volatile u32 *buffer)
1190 {
1191         u32 count = 0, i;
1192         const struct cs_section_def *sect = NULL;
1193         const struct cs_extent_def *ext = NULL;
1194
1195         if (adev->gfx.rlc.cs_data == NULL)
1196                 return;
1197         if (buffer == NULL)
1198                 return;
1199
1200         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1201         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1202
1203         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1204         buffer[count++] = cpu_to_le32(0x80000000);
1205         buffer[count++] = cpu_to_le32(0x80000000);
1206
1207         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1208                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1209                         if (sect->id == SECT_CONTEXT) {
1210                                 buffer[count++] =
1211                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1212                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1213                                                 PACKET3_SET_CONTEXT_REG_START);
1214                                 for (i = 0; i < ext->reg_count; i++)
1215                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1216                         } else {
1217                                 return;
1218                         }
1219                 }
1220         }
1221
1222         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1223         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1224                         PACKET3_SET_CONTEXT_REG_START);
1225         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1226         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1227
1228         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1229         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1230
1231         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1232         buffer[count++] = cpu_to_le32(0);
1233 }
1234
1235 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1236 {
1237         const __le32 *fw_data;
1238         volatile u32 *dst_ptr;
1239         int me, i, max_me = 4;
1240         u32 bo_offset = 0;
1241         u32 table_offset, table_size;
1242
1243         if (adev->asic_type == CHIP_CARRIZO)
1244                 max_me = 5;
1245
1246         /* write the cp table buffer */
1247         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1248         for (me = 0; me < max_me; me++) {
1249                 if (me == 0) {
1250                         const struct gfx_firmware_header_v1_0 *hdr =
1251                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1252                         fw_data = (const __le32 *)
1253                                 (adev->gfx.ce_fw->data +
1254                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1255                         table_offset = le32_to_cpu(hdr->jt_offset);
1256                         table_size = le32_to_cpu(hdr->jt_size);
1257                 } else if (me == 1) {
1258                         const struct gfx_firmware_header_v1_0 *hdr =
1259                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1260                         fw_data = (const __le32 *)
1261                                 (adev->gfx.pfp_fw->data +
1262                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1263                         table_offset = le32_to_cpu(hdr->jt_offset);
1264                         table_size = le32_to_cpu(hdr->jt_size);
1265                 } else if (me == 2) {
1266                         const struct gfx_firmware_header_v1_0 *hdr =
1267                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1268                         fw_data = (const __le32 *)
1269                                 (adev->gfx.me_fw->data +
1270                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1271                         table_offset = le32_to_cpu(hdr->jt_offset);
1272                         table_size = le32_to_cpu(hdr->jt_size);
1273                 } else if (me == 3) {
1274                         const struct gfx_firmware_header_v1_0 *hdr =
1275                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1276                         fw_data = (const __le32 *)
1277                                 (adev->gfx.mec_fw->data +
1278                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1279                         table_offset = le32_to_cpu(hdr->jt_offset);
1280                         table_size = le32_to_cpu(hdr->jt_size);
1281                 } else  if (me == 4) {
1282                         const struct gfx_firmware_header_v1_0 *hdr =
1283                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1284                         fw_data = (const __le32 *)
1285                                 (adev->gfx.mec2_fw->data +
1286                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1287                         table_offset = le32_to_cpu(hdr->jt_offset);
1288                         table_size = le32_to_cpu(hdr->jt_size);
1289                 }
1290
1291                 for (i = 0; i < table_size; i ++) {
1292                         dst_ptr[bo_offset + i] =
1293                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1294                 }
1295
1296                 bo_offset += table_size;
1297         }
1298 }
1299
1300 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1301 {
1302         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1303         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1304 }
1305
1306 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1307 {
1308         volatile u32 *dst_ptr;
1309         u32 dws;
1310         const struct cs_section_def *cs_data;
1311         int r;
1312
1313         adev->gfx.rlc.cs_data = vi_cs_data;
1314
1315         cs_data = adev->gfx.rlc.cs_data;
1316
1317         if (cs_data) {
1318                 /* clear state block */
1319                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1320
1321                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1322                                               AMDGPU_GEM_DOMAIN_VRAM,
1323                                               &adev->gfx.rlc.clear_state_obj,
1324                                               &adev->gfx.rlc.clear_state_gpu_addr,
1325                                               (void **)&adev->gfx.rlc.cs_ptr);
1326                 if (r) {
1327                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1328                         gfx_v8_0_rlc_fini(adev);
1329                         return r;
1330                 }
1331
1332                 /* set up the cs buffer */
1333                 dst_ptr = adev->gfx.rlc.cs_ptr;
1334                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1335                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1336                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1337         }
1338
1339         if ((adev->asic_type == CHIP_CARRIZO) ||
1340             (adev->asic_type == CHIP_STONEY)) {
1341                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1342                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1343                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1344                                               &adev->gfx.rlc.cp_table_obj,
1345                                               &adev->gfx.rlc.cp_table_gpu_addr,
1346                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1347                 if (r) {
1348                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1349                         return r;
1350                 }
1351
1352                 cz_init_cp_jump_table(adev);
1353
1354                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1355                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1356         }
1357
1358         return 0;
1359 }
1360
1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1362 {
1363         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1364 }
1365
1366 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1367 {
1368         int r;
1369         u32 *hpd;
1370         size_t mec_hpd_size;
1371
1372         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1373
1374         /* take ownership of the relevant compute queues */
1375         amdgpu_gfx_compute_queue_acquire(adev);
1376
1377         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1378
1379         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1380                                       AMDGPU_GEM_DOMAIN_GTT,
1381                                       &adev->gfx.mec.hpd_eop_obj,
1382                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1383                                       (void **)&hpd);
1384         if (r) {
1385                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1386                 return r;
1387         }
1388
1389         memset(hpd, 0, mec_hpd_size);
1390
1391         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1392         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1393
1394         return 0;
1395 }
1396
1397 static const u32 vgpr_init_compute_shader[] =
1398 {
1399         0x7e000209, 0x7e020208,
1400         0x7e040207, 0x7e060206,
1401         0x7e080205, 0x7e0a0204,
1402         0x7e0c0203, 0x7e0e0202,
1403         0x7e100201, 0x7e120200,
1404         0x7e140209, 0x7e160208,
1405         0x7e180207, 0x7e1a0206,
1406         0x7e1c0205, 0x7e1e0204,
1407         0x7e200203, 0x7e220202,
1408         0x7e240201, 0x7e260200,
1409         0x7e280209, 0x7e2a0208,
1410         0x7e2c0207, 0x7e2e0206,
1411         0x7e300205, 0x7e320204,
1412         0x7e340203, 0x7e360202,
1413         0x7e380201, 0x7e3a0200,
1414         0x7e3c0209, 0x7e3e0208,
1415         0x7e400207, 0x7e420206,
1416         0x7e440205, 0x7e460204,
1417         0x7e480203, 0x7e4a0202,
1418         0x7e4c0201, 0x7e4e0200,
1419         0x7e500209, 0x7e520208,
1420         0x7e540207, 0x7e560206,
1421         0x7e580205, 0x7e5a0204,
1422         0x7e5c0203, 0x7e5e0202,
1423         0x7e600201, 0x7e620200,
1424         0x7e640209, 0x7e660208,
1425         0x7e680207, 0x7e6a0206,
1426         0x7e6c0205, 0x7e6e0204,
1427         0x7e700203, 0x7e720202,
1428         0x7e740201, 0x7e760200,
1429         0x7e780209, 0x7e7a0208,
1430         0x7e7c0207, 0x7e7e0206,
1431         0xbf8a0000, 0xbf810000,
1432 };
1433
1434 static const u32 sgpr_init_compute_shader[] =
1435 {
1436         0xbe8a0100, 0xbe8c0102,
1437         0xbe8e0104, 0xbe900106,
1438         0xbe920108, 0xbe940100,
1439         0xbe960102, 0xbe980104,
1440         0xbe9a0106, 0xbe9c0108,
1441         0xbe9e0100, 0xbea00102,
1442         0xbea20104, 0xbea40106,
1443         0xbea60108, 0xbea80100,
1444         0xbeaa0102, 0xbeac0104,
1445         0xbeae0106, 0xbeb00108,
1446         0xbeb20100, 0xbeb40102,
1447         0xbeb60104, 0xbeb80106,
1448         0xbeba0108, 0xbebc0100,
1449         0xbebe0102, 0xbec00104,
1450         0xbec20106, 0xbec40108,
1451         0xbec60100, 0xbec80102,
1452         0xbee60004, 0xbee70005,
1453         0xbeea0006, 0xbeeb0007,
1454         0xbee80008, 0xbee90009,
1455         0xbefc0000, 0xbf8a0000,
1456         0xbf810000, 0x00000000,
1457 };
1458
1459 static const u32 vgpr_init_regs[] =
1460 {
1461         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1462         mmCOMPUTE_RESOURCE_LIMITS, 0,
1463         mmCOMPUTE_NUM_THREAD_X, 256*4,
1464         mmCOMPUTE_NUM_THREAD_Y, 1,
1465         mmCOMPUTE_NUM_THREAD_Z, 1,
1466         mmCOMPUTE_PGM_RSRC2, 20,
1467         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1468         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1469         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1470         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1471         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1472         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1473         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1474         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1475         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1476         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1477 };
1478
1479 static const u32 sgpr1_init_regs[] =
1480 {
1481         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1482         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1483         mmCOMPUTE_NUM_THREAD_X, 256*5,
1484         mmCOMPUTE_NUM_THREAD_Y, 1,
1485         mmCOMPUTE_NUM_THREAD_Z, 1,
1486         mmCOMPUTE_PGM_RSRC2, 20,
1487         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1488         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1489         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1490         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1491         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1492         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1493         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1494         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1495         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1496         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1497 };
1498
1499 static const u32 sgpr2_init_regs[] =
1500 {
1501         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1502         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1503         mmCOMPUTE_NUM_THREAD_X, 256*5,
1504         mmCOMPUTE_NUM_THREAD_Y, 1,
1505         mmCOMPUTE_NUM_THREAD_Z, 1,
1506         mmCOMPUTE_PGM_RSRC2, 20,
1507         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1508         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1509         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1510         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1511         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1512         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1513         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1514         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1515         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1516         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1517 };
1518
1519 static const u32 sec_ded_counter_registers[] =
1520 {
1521         mmCPC_EDC_ATC_CNT,
1522         mmCPC_EDC_SCRATCH_CNT,
1523         mmCPC_EDC_UCODE_CNT,
1524         mmCPF_EDC_ATC_CNT,
1525         mmCPF_EDC_ROQ_CNT,
1526         mmCPF_EDC_TAG_CNT,
1527         mmCPG_EDC_ATC_CNT,
1528         mmCPG_EDC_DMA_CNT,
1529         mmCPG_EDC_TAG_CNT,
1530         mmDC_EDC_CSINVOC_CNT,
1531         mmDC_EDC_RESTORE_CNT,
1532         mmDC_EDC_STATE_CNT,
1533         mmGDS_EDC_CNT,
1534         mmGDS_EDC_GRBM_CNT,
1535         mmGDS_EDC_OA_DED,
1536         mmSPI_EDC_CNT,
1537         mmSQC_ATC_EDC_GATCL1_CNT,
1538         mmSQC_EDC_CNT,
1539         mmSQ_EDC_DED_CNT,
1540         mmSQ_EDC_INFO,
1541         mmSQ_EDC_SEC_CNT,
1542         mmTCC_EDC_CNT,
1543         mmTCP_ATC_EDC_GATCL1_CNT,
1544         mmTCP_EDC_CNT,
1545         mmTD_EDC_CNT
1546 };
1547
1548 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1549 {
1550         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1551         struct amdgpu_ib ib;
1552         struct dma_fence *f = NULL;
1553         int r, i;
1554         u32 tmp;
1555         unsigned total_size, vgpr_offset, sgpr_offset;
1556         u64 gpu_addr;
1557
1558         /* only supported on CZ */
1559         if (adev->asic_type != CHIP_CARRIZO)
1560                 return 0;
1561
1562         /* bail if the compute ring is not ready */
1563         if (!ring->ready)
1564                 return 0;
1565
1566         tmp = RREG32(mmGB_EDC_MODE);
1567         WREG32(mmGB_EDC_MODE, 0);
1568
1569         total_size =
1570                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1571         total_size +=
1572                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1573         total_size +=
1574                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1575         total_size = ALIGN(total_size, 256);
1576         vgpr_offset = total_size;
1577         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1578         sgpr_offset = total_size;
1579         total_size += sizeof(sgpr_init_compute_shader);
1580
1581         /* allocate an indirect buffer to put the commands in */
1582         memset(&ib, 0, sizeof(ib));
1583         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1584         if (r) {
1585                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1586                 return r;
1587         }
1588
1589         /* load the compute shaders */
1590         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1591                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1592
1593         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1594                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1595
1596         /* init the ib length to 0 */
1597         ib.length_dw = 0;
1598
1599         /* VGPR */
1600         /* write the register state for the compute dispatch */
1601         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1602                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1603                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1604                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1605         }
1606         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1607         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1608         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1609         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1610         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1611         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1612
1613         /* write dispatch packet */
1614         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1615         ib.ptr[ib.length_dw++] = 8; /* x */
1616         ib.ptr[ib.length_dw++] = 1; /* y */
1617         ib.ptr[ib.length_dw++] = 1; /* z */
1618         ib.ptr[ib.length_dw++] =
1619                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1620
1621         /* write CS partial flush packet */
1622         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1623         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1624
1625         /* SGPR1 */
1626         /* write the register state for the compute dispatch */
1627         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1628                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1630                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1631         }
1632         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1634         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1638
1639         /* write dispatch packet */
1640         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641         ib.ptr[ib.length_dw++] = 8; /* x */
1642         ib.ptr[ib.length_dw++] = 1; /* y */
1643         ib.ptr[ib.length_dw++] = 1; /* z */
1644         ib.ptr[ib.length_dw++] =
1645                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1646
1647         /* write CS partial flush packet */
1648         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1650
1651         /* SGPR2 */
1652         /* write the register state for the compute dispatch */
1653         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1654                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1656                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1657         }
1658         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1660         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664
1665         /* write dispatch packet */
1666         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667         ib.ptr[ib.length_dw++] = 8; /* x */
1668         ib.ptr[ib.length_dw++] = 1; /* y */
1669         ib.ptr[ib.length_dw++] = 1; /* z */
1670         ib.ptr[ib.length_dw++] =
1671                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672
1673         /* write CS partial flush packet */
1674         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676
1677         /* shedule the ib on the ring */
1678         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1679         if (r) {
1680                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1681                 goto fail;
1682         }
1683
1684         /* wait for the GPU to finish processing the IB */
1685         r = dma_fence_wait(f, false);
1686         if (r) {
1687                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1688                 goto fail;
1689         }
1690
1691         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1692         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1693         WREG32(mmGB_EDC_MODE, tmp);
1694
1695         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1696         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1697         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1698
1699
1700         /* read back registers to clear the counters */
1701         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1702                 RREG32(sec_ded_counter_registers[i]);
1703
1704 fail:
1705         amdgpu_ib_free(adev, &ib, NULL);
1706         dma_fence_put(f);
1707
1708         return r;
1709 }
1710
1711 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1712 {
1713         u32 gb_addr_config;
1714         u32 mc_shared_chmap, mc_arb_ramcfg;
1715         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1716         u32 tmp;
1717         int ret;
1718
1719         switch (adev->asic_type) {
1720         case CHIP_TOPAZ:
1721                 adev->gfx.config.max_shader_engines = 1;
1722                 adev->gfx.config.max_tile_pipes = 2;
1723                 adev->gfx.config.max_cu_per_sh = 6;
1724                 adev->gfx.config.max_sh_per_se = 1;
1725                 adev->gfx.config.max_backends_per_se = 2;
1726                 adev->gfx.config.max_texture_channel_caches = 2;
1727                 adev->gfx.config.max_gprs = 256;
1728                 adev->gfx.config.max_gs_threads = 32;
1729                 adev->gfx.config.max_hw_contexts = 8;
1730
1731                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1732                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1733                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1734                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1735                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1736                 break;
1737         case CHIP_FIJI:
1738                 adev->gfx.config.max_shader_engines = 4;
1739                 adev->gfx.config.max_tile_pipes = 16;
1740                 adev->gfx.config.max_cu_per_sh = 16;
1741                 adev->gfx.config.max_sh_per_se = 1;
1742                 adev->gfx.config.max_backends_per_se = 4;
1743                 adev->gfx.config.max_texture_channel_caches = 16;
1744                 adev->gfx.config.max_gprs = 256;
1745                 adev->gfx.config.max_gs_threads = 32;
1746                 adev->gfx.config.max_hw_contexts = 8;
1747
1748                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1749                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1750                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1751                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1752                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1753                 break;
1754         case CHIP_POLARIS11:
1755         case CHIP_POLARIS12:
1756                 ret = amdgpu_atombios_get_gfx_info(adev);
1757                 if (ret)
1758                         return ret;
1759                 adev->gfx.config.max_gprs = 256;
1760                 adev->gfx.config.max_gs_threads = 32;
1761                 adev->gfx.config.max_hw_contexts = 8;
1762
1763                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1764                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1765                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1766                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1767                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1768                 break;
1769         case CHIP_POLARIS10:
1770                 ret = amdgpu_atombios_get_gfx_info(adev);
1771                 if (ret)
1772                         return ret;
1773                 adev->gfx.config.max_gprs = 256;
1774                 adev->gfx.config.max_gs_threads = 32;
1775                 adev->gfx.config.max_hw_contexts = 8;
1776
1777                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1778                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1779                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1780                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1781                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1782                 break;
1783         case CHIP_TONGA:
1784                 adev->gfx.config.max_shader_engines = 4;
1785                 adev->gfx.config.max_tile_pipes = 8;
1786                 adev->gfx.config.max_cu_per_sh = 8;
1787                 adev->gfx.config.max_sh_per_se = 1;
1788                 adev->gfx.config.max_backends_per_se = 2;
1789                 adev->gfx.config.max_texture_channel_caches = 8;
1790                 adev->gfx.config.max_gprs = 256;
1791                 adev->gfx.config.max_gs_threads = 32;
1792                 adev->gfx.config.max_hw_contexts = 8;
1793
1794                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1799                 break;
1800         case CHIP_CARRIZO:
1801                 adev->gfx.config.max_shader_engines = 1;
1802                 adev->gfx.config.max_tile_pipes = 2;
1803                 adev->gfx.config.max_sh_per_se = 1;
1804                 adev->gfx.config.max_backends_per_se = 2;
1805                 adev->gfx.config.max_cu_per_sh = 8;
1806                 adev->gfx.config.max_texture_channel_caches = 2;
1807                 adev->gfx.config.max_gprs = 256;
1808                 adev->gfx.config.max_gs_threads = 32;
1809                 adev->gfx.config.max_hw_contexts = 8;
1810
1811                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1816                 break;
1817         case CHIP_STONEY:
1818                 adev->gfx.config.max_shader_engines = 1;
1819                 adev->gfx.config.max_tile_pipes = 2;
1820                 adev->gfx.config.max_sh_per_se = 1;
1821                 adev->gfx.config.max_backends_per_se = 1;
1822                 adev->gfx.config.max_cu_per_sh = 3;
1823                 adev->gfx.config.max_texture_channel_caches = 2;
1824                 adev->gfx.config.max_gprs = 256;
1825                 adev->gfx.config.max_gs_threads = 16;
1826                 adev->gfx.config.max_hw_contexts = 8;
1827
1828                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1829                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1830                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1831                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1832                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1833                 break;
1834         default:
1835                 adev->gfx.config.max_shader_engines = 2;
1836                 adev->gfx.config.max_tile_pipes = 4;
1837                 adev->gfx.config.max_cu_per_sh = 2;
1838                 adev->gfx.config.max_sh_per_se = 1;
1839                 adev->gfx.config.max_backends_per_se = 2;
1840                 adev->gfx.config.max_texture_channel_caches = 4;
1841                 adev->gfx.config.max_gprs = 256;
1842                 adev->gfx.config.max_gs_threads = 32;
1843                 adev->gfx.config.max_hw_contexts = 8;
1844
1845                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1846                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1847                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1848                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1849                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1850                 break;
1851         }
1852
1853         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1854         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1855         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1856
1857         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1858         adev->gfx.config.mem_max_burst_length_bytes = 256;
1859         if (adev->flags & AMD_IS_APU) {
1860                 /* Get memory bank mapping mode. */
1861                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1862                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1863                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1864
1865                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1866                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1867                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1868
1869                 /* Validate settings in case only one DIMM installed. */
1870                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1871                         dimm00_addr_map = 0;
1872                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1873                         dimm01_addr_map = 0;
1874                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1875                         dimm10_addr_map = 0;
1876                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1877                         dimm11_addr_map = 0;
1878
1879                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1880                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1881                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1882                         adev->gfx.config.mem_row_size_in_kb = 2;
1883                 else
1884                         adev->gfx.config.mem_row_size_in_kb = 1;
1885         } else {
1886                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1887                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1888                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1889                         adev->gfx.config.mem_row_size_in_kb = 4;
1890         }
1891
1892         adev->gfx.config.shader_engine_tile_size = 32;
1893         adev->gfx.config.num_gpus = 1;
1894         adev->gfx.config.multi_gpu_tile_size = 64;
1895
1896         /* fix up row size */
1897         switch (adev->gfx.config.mem_row_size_in_kb) {
1898         case 1:
1899         default:
1900                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1901                 break;
1902         case 2:
1903                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1904                 break;
1905         case 4:
1906                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1907                 break;
1908         }
1909         adev->gfx.config.gb_addr_config = gb_addr_config;
1910
1911         return 0;
1912 }
1913
1914 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1915                                         int mec, int pipe, int queue)
1916 {
1917         int r;
1918         unsigned irq_type;
1919         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1920
1921         ring = &adev->gfx.compute_ring[ring_id];
1922
1923         /* mec0 is me1 */
1924         ring->me = mec + 1;
1925         ring->pipe = pipe;
1926         ring->queue = queue;
1927
1928         ring->ring_obj = NULL;
1929         ring->use_doorbell = true;
1930         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
1931         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1932                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1933         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1934
1935         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1936                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1937                 + ring->pipe;
1938
1939         /* type-2 packets are deprecated on MEC, use type-3 instead */
1940         r = amdgpu_ring_init(adev, ring, 1024,
1941                         &adev->gfx.eop_irq, irq_type);
1942         if (r)
1943                 return r;
1944
1945
1946         return 0;
1947 }
1948
1949 static int gfx_v8_0_sw_init(void *handle)
1950 {
1951         int i, j, k, r, ring_id;
1952         struct amdgpu_ring *ring;
1953         struct amdgpu_kiq *kiq;
1954         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1955
1956         switch (adev->asic_type) {
1957         case CHIP_FIJI:
1958         case CHIP_TONGA:
1959         case CHIP_POLARIS11:
1960         case CHIP_POLARIS12:
1961         case CHIP_POLARIS10:
1962         case CHIP_CARRIZO:
1963                 adev->gfx.mec.num_mec = 2;
1964                 break;
1965         case CHIP_TOPAZ:
1966         case CHIP_STONEY:
1967         default:
1968                 adev->gfx.mec.num_mec = 1;
1969                 break;
1970         }
1971
1972         adev->gfx.mec.num_pipe_per_mec = 4;
1973         adev->gfx.mec.num_queue_per_pipe = 8;
1974
1975         /* KIQ event */
1976         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
1977         if (r)
1978                 return r;
1979
1980         /* EOP Event */
1981         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
1982         if (r)
1983                 return r;
1984
1985         /* Privileged reg */
1986         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
1987                               &adev->gfx.priv_reg_irq);
1988         if (r)
1989                 return r;
1990
1991         /* Privileged inst */
1992         r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
1993                               &adev->gfx.priv_inst_irq);
1994         if (r)
1995                 return r;
1996
1997         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1998
1999         gfx_v8_0_scratch_init(adev);
2000
2001         r = gfx_v8_0_init_microcode(adev);
2002         if (r) {
2003                 DRM_ERROR("Failed to load gfx firmware!\n");
2004                 return r;
2005         }
2006
2007         r = gfx_v8_0_rlc_init(adev);
2008         if (r) {
2009                 DRM_ERROR("Failed to init rlc BOs!\n");
2010                 return r;
2011         }
2012
2013         r = gfx_v8_0_mec_init(adev);
2014         if (r) {
2015                 DRM_ERROR("Failed to init MEC BOs!\n");
2016                 return r;
2017         }
2018
2019         /* set up the gfx ring */
2020         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2021                 ring = &adev->gfx.gfx_ring[i];
2022                 ring->ring_obj = NULL;
2023                 sprintf(ring->name, "gfx");
2024                 /* no gfx doorbells on iceland */
2025                 if (adev->asic_type != CHIP_TOPAZ) {
2026                         ring->use_doorbell = true;
2027                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2028                 }
2029
2030                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2031                                      AMDGPU_CP_IRQ_GFX_EOP);
2032                 if (r)
2033                         return r;
2034         }
2035
2036
2037         /* set up the compute queues - allocate horizontally across pipes */
2038         ring_id = 0;
2039         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2040                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2041                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2042                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2043                                         continue;
2044
2045                                 r = gfx_v8_0_compute_ring_init(adev,
2046                                                                 ring_id,
2047                                                                 i, k, j);
2048                                 if (r)
2049                                         return r;
2050
2051                                 ring_id++;
2052                         }
2053                 }
2054         }
2055
2056         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2057         if (r) {
2058                 DRM_ERROR("Failed to init KIQ BOs!\n");
2059                 return r;
2060         }
2061
2062         kiq = &adev->gfx.kiq;
2063         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2064         if (r)
2065                 return r;
2066
2067         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2068         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2069         if (r)
2070                 return r;
2071
2072         /* reserve GDS, GWS and OA resource for gfx */
2073         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2074                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2075                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2076         if (r)
2077                 return r;
2078
2079         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2080                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2081                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2082         if (r)
2083                 return r;
2084
2085         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2086                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2087                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2088         if (r)
2089                 return r;
2090
2091         adev->gfx.ce_ram_size = 0x8000;
2092
2093         r = gfx_v8_0_gpu_early_init(adev);
2094         if (r)
2095                 return r;
2096
2097         return 0;
2098 }
2099
2100 static int gfx_v8_0_sw_fini(void *handle)
2101 {
2102         int i;
2103         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2104
2105         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2106         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2107         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2108
2109         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2110                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2111         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2112                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2113
2114         amdgpu_gfx_compute_mqd_sw_fini(adev);
2115         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2116         amdgpu_gfx_kiq_fini(adev);
2117
2118         gfx_v8_0_mec_fini(adev);
2119         gfx_v8_0_rlc_fini(adev);
2120         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2121                                 &adev->gfx.rlc.clear_state_gpu_addr,
2122                                 (void **)&adev->gfx.rlc.cs_ptr);
2123         if ((adev->asic_type == CHIP_CARRIZO) ||
2124             (adev->asic_type == CHIP_STONEY)) {
2125                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2126                                 &adev->gfx.rlc.cp_table_gpu_addr,
2127                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2128         }
2129         gfx_v8_0_free_microcode(adev);
2130
2131         return 0;
2132 }
2133
2134 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2135 {
2136         uint32_t *modearray, *mod2array;
2137         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2138         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2139         u32 reg_offset;
2140
2141         modearray = adev->gfx.config.tile_mode_array;
2142         mod2array = adev->gfx.config.macrotile_mode_array;
2143
2144         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2145                 modearray[reg_offset] = 0;
2146
2147         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2148                 mod2array[reg_offset] = 0;
2149
2150         switch (adev->asic_type) {
2151         case CHIP_TOPAZ:
2152                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153                                 PIPE_CONFIG(ADDR_SURF_P2) |
2154                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2155                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157                                 PIPE_CONFIG(ADDR_SURF_P2) |
2158                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2159                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161                                 PIPE_CONFIG(ADDR_SURF_P2) |
2162                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2163                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165                                 PIPE_CONFIG(ADDR_SURF_P2) |
2166                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2167                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169                                 PIPE_CONFIG(ADDR_SURF_P2) |
2170                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2171                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173                                 PIPE_CONFIG(ADDR_SURF_P2) |
2174                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2175                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2176                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2177                                 PIPE_CONFIG(ADDR_SURF_P2) |
2178                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2179                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2180                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2181                                 PIPE_CONFIG(ADDR_SURF_P2));
2182                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2183                                 PIPE_CONFIG(ADDR_SURF_P2) |
2184                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2187                                  PIPE_CONFIG(ADDR_SURF_P2) |
2188                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2189                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2191                                  PIPE_CONFIG(ADDR_SURF_P2) |
2192                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2193                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2194                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2195                                  PIPE_CONFIG(ADDR_SURF_P2) |
2196                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2199                                  PIPE_CONFIG(ADDR_SURF_P2) |
2200                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2203                                  PIPE_CONFIG(ADDR_SURF_P2) |
2204                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2207                                  PIPE_CONFIG(ADDR_SURF_P2) |
2208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2210                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2211                                  PIPE_CONFIG(ADDR_SURF_P2) |
2212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2215                                  PIPE_CONFIG(ADDR_SURF_P2) |
2216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2219                                  PIPE_CONFIG(ADDR_SURF_P2) |
2220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2223                                  PIPE_CONFIG(ADDR_SURF_P2) |
2224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2227                                  PIPE_CONFIG(ADDR_SURF_P2) |
2228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2230                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2231                                  PIPE_CONFIG(ADDR_SURF_P2) |
2232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2234                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2235                                  PIPE_CONFIG(ADDR_SURF_P2) |
2236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2238                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2239                                  PIPE_CONFIG(ADDR_SURF_P2) |
2240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2242                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2243                                  PIPE_CONFIG(ADDR_SURF_P2) |
2244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2247                                  PIPE_CONFIG(ADDR_SURF_P2) |
2248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2250                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2251                                  PIPE_CONFIG(ADDR_SURF_P2) |
2252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2254
2255                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2256                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2257                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258                                 NUM_BANKS(ADDR_SURF_8_BANK));
2259                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2260                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262                                 NUM_BANKS(ADDR_SURF_8_BANK));
2263                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2264                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2266                                 NUM_BANKS(ADDR_SURF_8_BANK));
2267                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2269                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2270                                 NUM_BANKS(ADDR_SURF_8_BANK));
2271                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2273                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2274                                 NUM_BANKS(ADDR_SURF_8_BANK));
2275                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2277                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2278                                 NUM_BANKS(ADDR_SURF_8_BANK));
2279                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2282                                 NUM_BANKS(ADDR_SURF_8_BANK));
2283                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2284                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2285                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286                                 NUM_BANKS(ADDR_SURF_16_BANK));
2287                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2288                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2289                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290                                 NUM_BANKS(ADDR_SURF_16_BANK));
2291                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294                                  NUM_BANKS(ADDR_SURF_16_BANK));
2295                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2296                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2297                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298                                  NUM_BANKS(ADDR_SURF_16_BANK));
2299                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2301                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2302                                  NUM_BANKS(ADDR_SURF_16_BANK));
2303                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2306                                  NUM_BANKS(ADDR_SURF_16_BANK));
2307                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2309                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2310                                  NUM_BANKS(ADDR_SURF_8_BANK));
2311
2312                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2313                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2314                             reg_offset != 23)
2315                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2316
2317                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2318                         if (reg_offset != 7)
2319                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2320
2321                 break;
2322         case CHIP_FIJI:
2323                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2326                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2330                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2334                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2338                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2344                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2350                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2351                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2352                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2353                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2354                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2355                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2356                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2357                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2358                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2366                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2368                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2369                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2370                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2371                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2373                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2382                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2386                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2389                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2390                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2391                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2393                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2394                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2396                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2398                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2402                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2406                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2410                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2414                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2415                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2416                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2418                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2420                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2422                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2424                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2425                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2426                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2428                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2429                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2442                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2443                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2444                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2445
2446                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449                                 NUM_BANKS(ADDR_SURF_8_BANK));
2450                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453                                 NUM_BANKS(ADDR_SURF_8_BANK));
2454                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2457                                 NUM_BANKS(ADDR_SURF_8_BANK));
2458                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461                                 NUM_BANKS(ADDR_SURF_8_BANK));
2462                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2464                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465                                 NUM_BANKS(ADDR_SURF_8_BANK));
2466                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469                                 NUM_BANKS(ADDR_SURF_8_BANK));
2470                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473                                 NUM_BANKS(ADDR_SURF_8_BANK));
2474                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2476                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477                                 NUM_BANKS(ADDR_SURF_8_BANK));
2478                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2480                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481                                 NUM_BANKS(ADDR_SURF_8_BANK));
2482                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2484                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485                                  NUM_BANKS(ADDR_SURF_8_BANK));
2486                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2489                                  NUM_BANKS(ADDR_SURF_8_BANK));
2490                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2492                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2493                                  NUM_BANKS(ADDR_SURF_8_BANK));
2494                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2497                                  NUM_BANKS(ADDR_SURF_8_BANK));
2498                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2501                                  NUM_BANKS(ADDR_SURF_4_BANK));
2502
2503                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2504                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2505
2506                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2507                         if (reg_offset != 7)
2508                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2509
2510                 break;
2511         case CHIP_TONGA:
2512                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2515                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2519                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2523                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2527                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2539                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2540                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2542                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2543                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2544                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2550                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2554                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2555                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2558                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2559                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2560                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2561                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2562                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2563                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2571                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2575                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2578                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2579                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2580                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2582                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2583                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2587                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2591                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2595                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2599                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2603                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2604                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2605                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2607                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2609                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2611                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2613                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2614                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2615                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2617                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2618                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2619                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2623                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2627                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2628                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2630                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2631                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2632                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2633                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2634
2635                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638                                 NUM_BANKS(ADDR_SURF_16_BANK));
2639                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642                                 NUM_BANKS(ADDR_SURF_16_BANK));
2643                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2645                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2646                                 NUM_BANKS(ADDR_SURF_16_BANK));
2647                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650                                 NUM_BANKS(ADDR_SURF_16_BANK));
2651                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2653                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2654                                 NUM_BANKS(ADDR_SURF_16_BANK));
2655                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658                                 NUM_BANKS(ADDR_SURF_16_BANK));
2659                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662                                 NUM_BANKS(ADDR_SURF_16_BANK));
2663                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2665                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2666                                 NUM_BANKS(ADDR_SURF_16_BANK));
2667                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2669                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2670                                 NUM_BANKS(ADDR_SURF_16_BANK));
2671                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2673                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2674                                  NUM_BANKS(ADDR_SURF_16_BANK));
2675                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2678                                  NUM_BANKS(ADDR_SURF_16_BANK));
2679                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682                                  NUM_BANKS(ADDR_SURF_8_BANK));
2683                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2686                                  NUM_BANKS(ADDR_SURF_4_BANK));
2687                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2688                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2689                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2690                                  NUM_BANKS(ADDR_SURF_4_BANK));
2691
2692                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2693                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2694
2695                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2696                         if (reg_offset != 7)
2697                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2698
2699                 break;
2700         case CHIP_POLARIS11:
2701         case CHIP_POLARIS12:
2702                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2706                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2707                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2708                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2710                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2714                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2715                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2716                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2718                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2719                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2720                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2722                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2723                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2724                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2726                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2727                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2728                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2730                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2731                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2734                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2735                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2736                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2737                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2738                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2739                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2740                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2742                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2743                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2744                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2745                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2746                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2747                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2748                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2749                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2750                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2751                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2752                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2753                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2754                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2755                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2756                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2758                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2759                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2760                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2761                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2762                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2763                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2764                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2766                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2767                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2768                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2769                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2771                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2772                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2773                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2775                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2776                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2777                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2779                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2780                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2781                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2783                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2784                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2785                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2787                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2788                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2789                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2791                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2792                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2793                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2795                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2796                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2797                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2799                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2800                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2801                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2802                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2803                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2804                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2805                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2806                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2807                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2808                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2809                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2810                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2811                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2812                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2813                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2814                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2815                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2816                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2817                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2819                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2820                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2821                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2822                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2823                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2824
2825                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2827                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2828                                 NUM_BANKS(ADDR_SURF_16_BANK));
2829
2830                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2831                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2832                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2833                                 NUM_BANKS(ADDR_SURF_16_BANK));
2834
2835                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2836                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2837                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2838                                 NUM_BANKS(ADDR_SURF_16_BANK));
2839
2840                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2842                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2843                                 NUM_BANKS(ADDR_SURF_16_BANK));
2844
2845                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2846                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2847                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2848                                 NUM_BANKS(ADDR_SURF_16_BANK));
2849
2850                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2852                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2853                                 NUM_BANKS(ADDR_SURF_16_BANK));
2854
2855                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2856                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2857                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2858                                 NUM_BANKS(ADDR_SURF_16_BANK));
2859
2860                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2861                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2862                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2863                                 NUM_BANKS(ADDR_SURF_16_BANK));
2864
2865                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2866                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2867                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2868                                 NUM_BANKS(ADDR_SURF_16_BANK));
2869
2870                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2871                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2872                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2873                                 NUM_BANKS(ADDR_SURF_16_BANK));
2874
2875                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2876                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2877                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2878                                 NUM_BANKS(ADDR_SURF_16_BANK));
2879
2880                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2882                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2883                                 NUM_BANKS(ADDR_SURF_16_BANK));
2884
2885                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2886                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2887                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2888                                 NUM_BANKS(ADDR_SURF_8_BANK));
2889
2890                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2891                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2892                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2893                                 NUM_BANKS(ADDR_SURF_4_BANK));
2894
2895                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2896                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2897
2898                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2899                         if (reg_offset != 7)
2900                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2901
2902                 break;
2903         case CHIP_POLARIS10:
2904                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2906                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2908                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2909                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2910                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2912                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2913                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2914                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2916                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2917                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2918                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2920                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2921                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2922                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2924                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2925                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2926                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2928                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2929                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2930                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2932                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2933                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2934                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2936                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2937                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2938                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2939                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2940                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2941                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2943                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2944                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2945                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2946                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2947                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2948                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2949                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2950                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2951                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2952                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2953                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2954                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2955                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2956                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2957                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2958                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2959                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2960                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2961                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2962                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2963                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2964                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2966                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2967                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2968                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2969                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2970                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2971                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2972                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2973                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2974                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2975                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2977                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2978                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2979                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2981                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2982                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2983                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2985                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2986                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2987                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2989                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2990                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2991                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2992                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2993                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2994                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2995                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2996                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2997                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2998                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2999                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3000                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3001                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3002                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3003                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3004                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3005                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3006                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3007                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3008                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3009                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3010                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3011                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3012                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3013                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3014                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3015                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3016                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3017                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3018                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3019                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3020                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3021                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3022                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3023                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3024                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3025                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3026
3027                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3028                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3029                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3030                                 NUM_BANKS(ADDR_SURF_16_BANK));
3031
3032                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3033                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3034                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3035                                 NUM_BANKS(ADDR_SURF_16_BANK));
3036
3037                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3038                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3039                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3040                                 NUM_BANKS(ADDR_SURF_16_BANK));
3041
3042                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3043                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3044                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3045                                 NUM_BANKS(ADDR_SURF_16_BANK));
3046
3047                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3048                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3049                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3050                                 NUM_BANKS(ADDR_SURF_16_BANK));
3051
3052                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3053                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3054                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3055                                 NUM_BANKS(ADDR_SURF_16_BANK));
3056
3057                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3058                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3059                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3060                                 NUM_BANKS(ADDR_SURF_16_BANK));
3061
3062                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3063                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3064                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3065                                 NUM_BANKS(ADDR_SURF_16_BANK));
3066
3067                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3068                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3069                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3070                                 NUM_BANKS(ADDR_SURF_16_BANK));
3071
3072                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3073                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3074                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3075                                 NUM_BANKS(ADDR_SURF_16_BANK));
3076
3077                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3078                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3079                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3080                                 NUM_BANKS(ADDR_SURF_16_BANK));
3081
3082                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3083                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3084                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3085                                 NUM_BANKS(ADDR_SURF_8_BANK));
3086
3087                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3089                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3090                                 NUM_BANKS(ADDR_SURF_4_BANK));
3091
3092                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3093                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3094                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3095                                 NUM_BANKS(ADDR_SURF_4_BANK));
3096
3097                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3098                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3099
3100                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3101                         if (reg_offset != 7)
3102                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3103
3104                 break;
3105         case CHIP_STONEY:
3106                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3107                                 PIPE_CONFIG(ADDR_SURF_P2) |
3108                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3109                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3110                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3111                                 PIPE_CONFIG(ADDR_SURF_P2) |
3112                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3114                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3115                                 PIPE_CONFIG(ADDR_SURF_P2) |
3116                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3117                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3118                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3119                                 PIPE_CONFIG(ADDR_SURF_P2) |
3120                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3122                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3123                                 PIPE_CONFIG(ADDR_SURF_P2) |
3124                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3126                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3127                                 PIPE_CONFIG(ADDR_SURF_P2) |
3128                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3130                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3131                                 PIPE_CONFIG(ADDR_SURF_P2) |
3132                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3133                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3134                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3135                                 PIPE_CONFIG(ADDR_SURF_P2));
3136                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3137                                 PIPE_CONFIG(ADDR_SURF_P2) |
3138                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3139                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3140                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3141                                  PIPE_CONFIG(ADDR_SURF_P2) |
3142                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3143                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3144                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3145                                  PIPE_CONFIG(ADDR_SURF_P2) |
3146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3148                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3149                                  PIPE_CONFIG(ADDR_SURF_P2) |
3150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3152                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3153                                  PIPE_CONFIG(ADDR_SURF_P2) |
3154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3156                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3157                                  PIPE_CONFIG(ADDR_SURF_P2) |
3158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3160                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3161                                  PIPE_CONFIG(ADDR_SURF_P2) |
3162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3164                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3165                                  PIPE_CONFIG(ADDR_SURF_P2) |
3166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3168                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3169                                  PIPE_CONFIG(ADDR_SURF_P2) |
3170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3172                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3173                                  PIPE_CONFIG(ADDR_SURF_P2) |
3174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3176                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3177                                  PIPE_CONFIG(ADDR_SURF_P2) |
3178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3180                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3181                                  PIPE_CONFIG(ADDR_SURF_P2) |
3182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3184                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3185                                  PIPE_CONFIG(ADDR_SURF_P2) |
3186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3188                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3189                                  PIPE_CONFIG(ADDR_SURF_P2) |
3190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3192                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3193                                  PIPE_CONFIG(ADDR_SURF_P2) |
3194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3196                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3197                                  PIPE_CONFIG(ADDR_SURF_P2) |
3198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3200                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3201                                  PIPE_CONFIG(ADDR_SURF_P2) |
3202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3204                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3205                                  PIPE_CONFIG(ADDR_SURF_P2) |
3206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3208
3209                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3210                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3211                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3212                                 NUM_BANKS(ADDR_SURF_8_BANK));
3213                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3214                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3215                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3216                                 NUM_BANKS(ADDR_SURF_8_BANK));
3217                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3218                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3219                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3220                                 NUM_BANKS(ADDR_SURF_8_BANK));
3221                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3222                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3223                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3224                                 NUM_BANKS(ADDR_SURF_8_BANK));
3225                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3226                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3227                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3228                                 NUM_BANKS(ADDR_SURF_8_BANK));
3229                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3230                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3231                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3232                                 NUM_BANKS(ADDR_SURF_8_BANK));
3233                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3234                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3235                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3236                                 NUM_BANKS(ADDR_SURF_8_BANK));
3237                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3238                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3239                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3240                                 NUM_BANKS(ADDR_SURF_16_BANK));
3241                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3242                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3243                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3244                                 NUM_BANKS(ADDR_SURF_16_BANK));
3245                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3246                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3247                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3248                                  NUM_BANKS(ADDR_SURF_16_BANK));
3249                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3250                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3251                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3252                                  NUM_BANKS(ADDR_SURF_16_BANK));
3253                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3254                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3255                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3256                                  NUM_BANKS(ADDR_SURF_16_BANK));
3257                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3258                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3259                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3260                                  NUM_BANKS(ADDR_SURF_16_BANK));
3261                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3262                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3263                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3264                                  NUM_BANKS(ADDR_SURF_8_BANK));
3265
3266                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3267                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3268                             reg_offset != 23)
3269                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3270
3271                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3272                         if (reg_offset != 7)
3273                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3274
3275                 break;
3276         default:
3277                 dev_warn(adev->dev,
3278                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3279                          adev->asic_type);
3280
3281         case CHIP_CARRIZO:
3282                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3283                                 PIPE_CONFIG(ADDR_SURF_P2) |
3284                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3285                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3286                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3287                                 PIPE_CONFIG(ADDR_SURF_P2) |
3288                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3290                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3291                                 PIPE_CONFIG(ADDR_SURF_P2) |
3292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3294                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3295                                 PIPE_CONFIG(ADDR_SURF_P2) |
3296                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3298                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3299                                 PIPE_CONFIG(ADDR_SURF_P2) |
3300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3302                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3303                                 PIPE_CONFIG(ADDR_SURF_P2) |
3304                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3305                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3306                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3307                                 PIPE_CONFIG(ADDR_SURF_P2) |
3308                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3309                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3310                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3311                                 PIPE_CONFIG(ADDR_SURF_P2));
3312                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3313                                 PIPE_CONFIG(ADDR_SURF_P2) |
3314                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3315                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3316                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3317                                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3320                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3321                                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3324                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3325                                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3328                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3329                                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3332                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3333                                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3336                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3337                                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3340                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3341                                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3344                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3345                                  PIPE_CONFIG(ADDR_SURF_P2) |
3346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3348                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3349                                  PIPE_CONFIG(ADDR_SURF_P2) |
3350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3352                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3353                                  PIPE_CONFIG(ADDR_SURF_P2) |
3354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3356                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3357                                  PIPE_CONFIG(ADDR_SURF_P2) |
3358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3360                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3361                                  PIPE_CONFIG(ADDR_SURF_P2) |
3362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3364                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3365                                  PIPE_CONFIG(ADDR_SURF_P2) |
3366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3368                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3369                                  PIPE_CONFIG(ADDR_SURF_P2) |
3370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3372                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3373                                  PIPE_CONFIG(ADDR_SURF_P2) |
3374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3376                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3377                                  PIPE_CONFIG(ADDR_SURF_P2) |
3378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3380                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3381                                  PIPE_CONFIG(ADDR_SURF_P2) |
3382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3384
3385                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3386                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3387                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388                                 NUM_BANKS(ADDR_SURF_8_BANK));
3389                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3391                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3392                                 NUM_BANKS(ADDR_SURF_8_BANK));
3393                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3396                                 NUM_BANKS(ADDR_SURF_8_BANK));
3397                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400                                 NUM_BANKS(ADDR_SURF_8_BANK));
3401                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3402                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3403                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3404                                 NUM_BANKS(ADDR_SURF_8_BANK));
3405                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3406                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3407                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3408                                 NUM_BANKS(ADDR_SURF_8_BANK));
3409                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3410                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3411                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3412                                 NUM_BANKS(ADDR_SURF_8_BANK));
3413                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3414                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3415                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3416                                 NUM_BANKS(ADDR_SURF_16_BANK));
3417                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3418                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3419                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3420                                 NUM_BANKS(ADDR_SURF_16_BANK));
3421                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3422                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3423                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3424                                  NUM_BANKS(ADDR_SURF_16_BANK));
3425                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3426                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3427                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3428                                  NUM_BANKS(ADDR_SURF_16_BANK));
3429                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3430                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3431                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3432                                  NUM_BANKS(ADDR_SURF_16_BANK));
3433                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3434                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3435                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3436                                  NUM_BANKS(ADDR_SURF_16_BANK));
3437                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3438                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3439                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3440                                  NUM_BANKS(ADDR_SURF_8_BANK));
3441
3442                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3443                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3444                             reg_offset != 23)
3445                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3446
3447                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3448                         if (reg_offset != 7)
3449                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3450
3451                 break;
3452         }
3453 }
3454
3455 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3456                                   u32 se_num, u32 sh_num, u32 instance)
3457 {
3458         u32 data;
3459
3460         if (instance == 0xffffffff)
3461                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3462         else
3463                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3464
3465         if (se_num == 0xffffffff)
3466                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3467         else
3468                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3469
3470         if (sh_num == 0xffffffff)
3471                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3472         else
3473                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3474
3475         WREG32(mmGRBM_GFX_INDEX, data);
3476 }
3477
3478 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3479 {
3480         u32 data, mask;
3481
3482         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3483                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3484
3485         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3486
3487         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3488                                          adev->gfx.config.max_sh_per_se);
3489
3490         return (~data) & mask;
3491 }
3492
3493 static void
3494 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3495 {
3496         switch (adev->asic_type) {
3497         case CHIP_FIJI:
3498                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3499                           RB_XSEL2(1) | PKR_MAP(2) |
3500                           PKR_XSEL(1) | PKR_YSEL(1) |
3501                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3502                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3503                            SE_PAIR_YSEL(2);
3504                 break;
3505         case CHIP_TONGA:
3506         case CHIP_POLARIS10:
3507                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3508                           SE_XSEL(1) | SE_YSEL(1);
3509                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3510                            SE_PAIR_YSEL(2);
3511                 break;
3512         case CHIP_TOPAZ:
3513         case CHIP_CARRIZO:
3514                 *rconf |= RB_MAP_PKR0(2);
3515                 *rconf1 |= 0x0;
3516                 break;
3517         case CHIP_POLARIS11:
3518         case CHIP_POLARIS12:
3519                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3520                           SE_XSEL(1) | SE_YSEL(1);
3521                 *rconf1 |= 0x0;
3522                 break;
3523         case CHIP_STONEY:
3524                 *rconf |= 0x0;
3525                 *rconf1 |= 0x0;
3526                 break;
3527         default:
3528                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3529                 break;
3530         }
3531 }
3532
3533 static void
3534 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3535                                         u32 raster_config, u32 raster_config_1,
3536                                         unsigned rb_mask, unsigned num_rb)
3537 {
3538         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3539         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3540         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3541         unsigned rb_per_se = num_rb / num_se;
3542         unsigned se_mask[4];
3543         unsigned se;
3544
3545         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3546         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3547         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3548         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3549
3550         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3551         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3552         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3553
3554         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3555                              (!se_mask[2] && !se_mask[3]))) {
3556                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3557
3558                 if (!se_mask[0] && !se_mask[1]) {
3559                         raster_config_1 |=
3560                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3561                 } else {
3562                         raster_config_1 |=
3563                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3564                 }
3565         }
3566
3567         for (se = 0; se < num_se; se++) {
3568                 unsigned raster_config_se = raster_config;
3569                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3570                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3571                 int idx = (se / 2) * 2;
3572
3573                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3574                         raster_config_se &= ~SE_MAP_MASK;
3575
3576                         if (!se_mask[idx]) {
3577                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3578                         } else {
3579                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3580                         }
3581                 }
3582
3583                 pkr0_mask &= rb_mask;
3584                 pkr1_mask &= rb_mask;
3585                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3586                         raster_config_se &= ~PKR_MAP_MASK;
3587
3588                         if (!pkr0_mask) {
3589                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3590                         } else {
3591                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3592                         }
3593                 }
3594
3595                 if (rb_per_se >= 2) {
3596                         unsigned rb0_mask = 1 << (se * rb_per_se);
3597                         unsigned rb1_mask = rb0_mask << 1;
3598
3599                         rb0_mask &= rb_mask;
3600                         rb1_mask &= rb_mask;
3601                         if (!rb0_mask || !rb1_mask) {
3602                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3603
3604                                 if (!rb0_mask) {
3605                                         raster_config_se |=
3606                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3607                                 } else {
3608                                         raster_config_se |=
3609                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3610                                 }
3611                         }
3612
3613                         if (rb_per_se > 2) {
3614                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3615                                 rb1_mask = rb0_mask << 1;
3616                                 rb0_mask &= rb_mask;
3617                                 rb1_mask &= rb_mask;
3618                                 if (!rb0_mask || !rb1_mask) {
3619                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3620
3621                                         if (!rb0_mask) {
3622                                                 raster_config_se |=
3623                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3624                                         } else {
3625                                                 raster_config_se |=
3626                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3627                                         }
3628                                 }
3629                         }
3630                 }
3631
3632                 /* GRBM_GFX_INDEX has a different offset on VI */
3633                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3634                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3635                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3636         }
3637
3638         /* GRBM_GFX_INDEX has a different offset on VI */
3639         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3640 }
3641
3642 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3643 {
3644         int i, j;
3645         u32 data;
3646         u32 raster_config = 0, raster_config_1 = 0;
3647         u32 active_rbs = 0;
3648         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3649                                         adev->gfx.config.max_sh_per_se;
3650         unsigned num_rb_pipes;
3651
3652         mutex_lock(&adev->grbm_idx_mutex);
3653         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3654                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3655                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3656                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3657                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3658                                                rb_bitmap_width_per_sh);
3659                 }
3660         }
3661         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3662
3663         adev->gfx.config.backend_enable_mask = active_rbs;
3664         adev->gfx.config.num_rbs = hweight32(active_rbs);
3665
3666         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3667                              adev->gfx.config.max_shader_engines, 16);
3668
3669         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3670
3671         if (!adev->gfx.config.backend_enable_mask ||
3672                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3673                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3674                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3675         } else {
3676                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3677                                                         adev->gfx.config.backend_enable_mask,
3678                                                         num_rb_pipes);
3679         }
3680
3681         /* cache the values for userspace */
3682         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3683                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3684                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3685                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3686                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3687                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3688                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3689                         adev->gfx.config.rb_config[i][j].raster_config =
3690                                 RREG32(mmPA_SC_RASTER_CONFIG);
3691                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3692                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3693                 }
3694         }
3695         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3696         mutex_unlock(&adev->grbm_idx_mutex);
3697 }
3698
3699 /**
3700  * gfx_v8_0_init_compute_vmid - gart enable
3701  *
3702  * @adev: amdgpu_device pointer
3703  *
3704  * Initialize compute vmid sh_mem registers
3705  *
3706  */
3707 #define DEFAULT_SH_MEM_BASES    (0x6000)
3708 #define FIRST_COMPUTE_VMID      (8)
3709 #define LAST_COMPUTE_VMID       (16)
3710 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3711 {
3712         int i;
3713         uint32_t sh_mem_config;
3714         uint32_t sh_mem_bases;
3715
3716         /*
3717          * Configure apertures:
3718          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3719          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3720          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3721          */
3722         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3723
3724         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3725                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3726                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3727                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3728                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3729                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3730
3731         mutex_lock(&adev->srbm_mutex);
3732         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3733                 vi_srbm_select(adev, 0, 0, 0, i);
3734                 /* CP and shaders */
3735                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3736                 WREG32(mmSH_MEM_APE1_BASE, 1);
3737                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3738                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3739         }
3740         vi_srbm_select(adev, 0, 0, 0, 0);
3741         mutex_unlock(&adev->srbm_mutex);
3742 }
3743
3744 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3745 {
3746         switch (adev->asic_type) {
3747         default:
3748                 adev->gfx.config.double_offchip_lds_buf = 1;
3749                 break;
3750         case CHIP_CARRIZO:
3751         case CHIP_STONEY:
3752                 adev->gfx.config.double_offchip_lds_buf = 0;
3753                 break;
3754         }
3755 }
3756
3757 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3758 {
3759         u32 tmp, sh_static_mem_cfg;
3760         int i;
3761
3762         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3763         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3764         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3765         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3766
3767         gfx_v8_0_tiling_mode_table_init(adev);
3768         gfx_v8_0_setup_rb(adev);
3769         gfx_v8_0_get_cu_info(adev);
3770         gfx_v8_0_config_init(adev);
3771
3772         /* XXX SH_MEM regs */
3773         /* where to put LDS, scratch, GPUVM in FSA64 space */
3774         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3775                                    SWIZZLE_ENABLE, 1);
3776         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3777                                    ELEMENT_SIZE, 1);
3778         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3779                                    INDEX_STRIDE, 3);
3780         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3781
3782         mutex_lock(&adev->srbm_mutex);
3783         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3784                 vi_srbm_select(adev, 0, 0, 0, i);
3785                 /* CP and shaders */
3786                 if (i == 0) {
3787                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3788                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3789                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3790                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3791                         WREG32(mmSH_MEM_CONFIG, tmp);
3792                         WREG32(mmSH_MEM_BASES, 0);
3793                 } else {
3794                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3795                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3796                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3797                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3798                         WREG32(mmSH_MEM_CONFIG, tmp);
3799                         tmp = adev->gmc.shared_aperture_start >> 48;
3800                         WREG32(mmSH_MEM_BASES, tmp);
3801                 }
3802
3803                 WREG32(mmSH_MEM_APE1_BASE, 1);
3804                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3805         }
3806         vi_srbm_select(adev, 0, 0, 0, 0);
3807         mutex_unlock(&adev->srbm_mutex);
3808
3809         gfx_v8_0_init_compute_vmid(adev);
3810
3811         mutex_lock(&adev->grbm_idx_mutex);
3812         /*
3813          * making sure that the following register writes will be broadcasted
3814          * to all the shaders
3815          */
3816         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3817
3818         WREG32(mmPA_SC_FIFO_SIZE,
3819                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3820                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3821                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3822                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3823                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3824                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3825                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3826                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3827
3828         tmp = RREG32(mmSPI_ARB_PRIORITY);
3829         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3830         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3831         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3832         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3833         WREG32(mmSPI_ARB_PRIORITY, tmp);
3834
3835         mutex_unlock(&adev->grbm_idx_mutex);
3836
3837 }
3838
3839 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3840 {
3841         u32 i, j, k;
3842         u32 mask;
3843
3844         mutex_lock(&adev->grbm_idx_mutex);
3845         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3846                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3847                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3848                         for (k = 0; k < adev->usec_timeout; k++) {
3849                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3850                                         break;
3851                                 udelay(1);
3852                         }
3853                         if (k == adev->usec_timeout) {
3854                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3855                                                       0xffffffff, 0xffffffff);
3856                                 mutex_unlock(&adev->grbm_idx_mutex);
3857                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3858                                          i, j);
3859                                 return;
3860                         }
3861                 }
3862         }
3863         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3864         mutex_unlock(&adev->grbm_idx_mutex);
3865
3866         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3867                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3868                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3869                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3870         for (k = 0; k < adev->usec_timeout; k++) {
3871                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3872                         break;
3873                 udelay(1);
3874         }
3875 }
3876
3877 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3878                                                bool enable)
3879 {
3880         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3881
3882         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3883         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3884         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3885         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3886
3887         WREG32(mmCP_INT_CNTL_RING0, tmp);
3888 }
3889
3890 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3891 {
3892         /* csib */
3893         WREG32(mmRLC_CSIB_ADDR_HI,
3894                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3895         WREG32(mmRLC_CSIB_ADDR_LO,
3896                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3897         WREG32(mmRLC_CSIB_LENGTH,
3898                         adev->gfx.rlc.clear_state_size);
3899 }
3900
3901 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3902                                 int ind_offset,
3903                                 int list_size,
3904                                 int *unique_indices,
3905                                 int *indices_count,
3906                                 int max_indices,
3907                                 int *ind_start_offsets,
3908                                 int *offset_count,
3909                                 int max_offset)
3910 {
3911         int indices;
3912         bool new_entry = true;
3913
3914         for (; ind_offset < list_size; ind_offset++) {
3915
3916                 if (new_entry) {
3917                         new_entry = false;
3918                         ind_start_offsets[*offset_count] = ind_offset;
3919                         *offset_count = *offset_count + 1;
3920                         BUG_ON(*offset_count >= max_offset);
3921                 }
3922
3923                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3924                         new_entry = true;
3925                         continue;
3926                 }
3927
3928                 ind_offset += 2;
3929
3930                 /* look for the matching indice */
3931                 for (indices = 0;
3932                         indices < *indices_count;
3933                         indices++) {
3934                         if (unique_indices[indices] ==
3935                                 register_list_format[ind_offset])
3936                                 break;
3937                 }
3938
3939                 if (indices >= *indices_count) {
3940                         unique_indices[*indices_count] =
3941                                 register_list_format[ind_offset];
3942                         indices = *indices_count;
3943                         *indices_count = *indices_count + 1;
3944                         BUG_ON(*indices_count >= max_indices);
3945                 }
3946
3947                 register_list_format[ind_offset] = indices;
3948         }
3949 }
3950
3951 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3952 {
3953         int i, temp, data;
3954         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3955         int indices_count = 0;
3956         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3957         int offset_count = 0;
3958
3959         int list_size;
3960         unsigned int *register_list_format =
3961                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3962         if (!register_list_format)
3963                 return -ENOMEM;
3964         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3965                         adev->gfx.rlc.reg_list_format_size_bytes);
3966
3967         gfx_v8_0_parse_ind_reg_list(register_list_format,
3968                                 RLC_FormatDirectRegListLength,
3969                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3970                                 unique_indices,
3971                                 &indices_count,
3972                                 ARRAY_SIZE(unique_indices),
3973                                 indirect_start_offsets,
3974                                 &offset_count,
3975                                 ARRAY_SIZE(indirect_start_offsets));
3976
3977         /* save and restore list */
3978         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3979
3980         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3981         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3982                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3983
3984         /* indirect list */
3985         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3986         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3987                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3988
3989         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3990         list_size = list_size >> 1;
3991         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3992         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3993
3994         /* starting offsets starts */
3995         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3996                 adev->gfx.rlc.starting_offsets_start);
3997         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3998                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3999                                 indirect_start_offsets[i]);
4000
4001         /* unique indices */
4002         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4003         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4004         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4005                 if (unique_indices[i] != 0) {
4006                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4007                         WREG32(data + i, unique_indices[i] >> 20);
4008                 }
4009         }
4010         kfree(register_list_format);
4011
4012         return 0;
4013 }
4014
4015 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4016 {
4017         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4018 }
4019
4020 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4021 {
4022         uint32_t data;
4023
4024         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4025
4026         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4027         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4028         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4029         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4030         WREG32(mmRLC_PG_DELAY, data);
4031
4032         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4033         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4034
4035 }
4036
4037 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4038                                                 bool enable)
4039 {
4040         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4041 }
4042
4043 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4044                                                   bool enable)
4045 {
4046         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4047 }
4048
4049 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4050 {
4051         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4052 }
4053
4054 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4055 {
4056         if ((adev->asic_type == CHIP_CARRIZO) ||
4057             (adev->asic_type == CHIP_STONEY)) {
4058                 gfx_v8_0_init_csb(adev);
4059                 gfx_v8_0_init_save_restore_list(adev);
4060                 gfx_v8_0_enable_save_restore_machine(adev);
4061                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4062                 gfx_v8_0_init_power_gating(adev);
4063                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4064         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4065                    (adev->asic_type == CHIP_POLARIS12)) {
4066                 gfx_v8_0_init_csb(adev);
4067                 gfx_v8_0_init_save_restore_list(adev);
4068                 gfx_v8_0_enable_save_restore_machine(adev);
4069                 gfx_v8_0_init_power_gating(adev);
4070         }
4071
4072 }
4073
4074 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4075 {
4076         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4077
4078         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4079         gfx_v8_0_wait_for_rlc_serdes(adev);
4080 }
4081
4082 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4083 {
4084         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4085         udelay(50);
4086
4087         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4088         udelay(50);
4089 }
4090
4091 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4092 {
4093         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4094
4095         /* carrizo do enable cp interrupt after cp inited */
4096         if (!(adev->flags & AMD_IS_APU))
4097                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4098
4099         udelay(50);
4100 }
4101
4102 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4103 {
4104         const struct rlc_firmware_header_v2_0 *hdr;
4105         const __le32 *fw_data;
4106         unsigned i, fw_size;
4107
4108         if (!adev->gfx.rlc_fw)
4109                 return -EINVAL;
4110
4111         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4112         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4113
4114         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4115                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4116         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4117
4118         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4119         for (i = 0; i < fw_size; i++)
4120                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4121         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4122
4123         return 0;
4124 }
4125
4126 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4127 {
4128         int r;
4129         u32 tmp;
4130
4131         gfx_v8_0_rlc_stop(adev);
4132
4133         /* disable CG */
4134         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4135         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4136                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4137         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4138         if (adev->asic_type == CHIP_POLARIS11 ||
4139             adev->asic_type == CHIP_POLARIS10 ||
4140             adev->asic_type == CHIP_POLARIS12) {
4141                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4142                 tmp &= ~0x3;
4143                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4144         }
4145
4146         /* disable PG */
4147         WREG32(mmRLC_PG_CNTL, 0);
4148
4149         gfx_v8_0_rlc_reset(adev);
4150         gfx_v8_0_init_pg(adev);
4151
4152
4153         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4154                 /* legacy rlc firmware loading */
4155                 r = gfx_v8_0_rlc_load_microcode(adev);
4156                 if (r)
4157                         return r;
4158         }
4159
4160         gfx_v8_0_rlc_start(adev);
4161
4162         return 0;
4163 }
4164
4165 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4166 {
4167         int i;
4168         u32 tmp = RREG32(mmCP_ME_CNTL);
4169
4170         if (enable) {
4171                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4172                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4173                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4174         } else {
4175                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4176                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4177                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4178                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4179                         adev->gfx.gfx_ring[i].ready = false;
4180         }
4181         WREG32(mmCP_ME_CNTL, tmp);
4182         udelay(50);
4183 }
4184
4185 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4186 {
4187         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4188         const struct gfx_firmware_header_v1_0 *ce_hdr;
4189         const struct gfx_firmware_header_v1_0 *me_hdr;
4190         const __le32 *fw_data;
4191         unsigned i, fw_size;
4192
4193         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4194                 return -EINVAL;
4195
4196         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4197                 adev->gfx.pfp_fw->data;
4198         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4199                 adev->gfx.ce_fw->data;
4200         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4201                 adev->gfx.me_fw->data;
4202
4203         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4204         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4205         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4206
4207         gfx_v8_0_cp_gfx_enable(adev, false);
4208
4209         /* PFP */
4210         fw_data = (const __le32 *)
4211                 (adev->gfx.pfp_fw->data +
4212                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4213         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4214         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4215         for (i = 0; i < fw_size; i++)
4216                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4217         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4218
4219         /* CE */
4220         fw_data = (const __le32 *)
4221                 (adev->gfx.ce_fw->data +
4222                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4223         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4224         WREG32(mmCP_CE_UCODE_ADDR, 0);
4225         for (i = 0; i < fw_size; i++)
4226                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4227         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4228
4229         /* ME */
4230         fw_data = (const __le32 *)
4231                 (adev->gfx.me_fw->data +
4232                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4233         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4234         WREG32(mmCP_ME_RAM_WADDR, 0);
4235         for (i = 0; i < fw_size; i++)
4236                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4237         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4238
4239         return 0;
4240 }
4241
4242 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4243 {
4244         u32 count = 0;
4245         const struct cs_section_def *sect = NULL;
4246         const struct cs_extent_def *ext = NULL;
4247
4248         /* begin clear state */
4249         count += 2;
4250         /* context control state */
4251         count += 3;
4252
4253         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4254                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4255                         if (sect->id == SECT_CONTEXT)
4256                                 count += 2 + ext->reg_count;
4257                         else
4258                                 return 0;
4259                 }
4260         }
4261         /* pa_sc_raster_config/pa_sc_raster_config1 */
4262         count += 4;
4263         /* end clear state */
4264         count += 2;
4265         /* clear state */
4266         count += 2;
4267
4268         return count;
4269 }
4270
4271 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4272 {
4273         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4274         const struct cs_section_def *sect = NULL;
4275         const struct cs_extent_def *ext = NULL;
4276         int r, i;
4277
4278         /* init the CP */
4279         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4280         WREG32(mmCP_ENDIAN_SWAP, 0);
4281         WREG32(mmCP_DEVICE_ID, 1);
4282
4283         gfx_v8_0_cp_gfx_enable(adev, true);
4284
4285         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4286         if (r) {
4287                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4288                 return r;
4289         }
4290
4291         /* clear state buffer */
4292         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4293         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4294
4295         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4296         amdgpu_ring_write(ring, 0x80000000);
4297         amdgpu_ring_write(ring, 0x80000000);
4298
4299         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4300                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4301                         if (sect->id == SECT_CONTEXT) {
4302                                 amdgpu_ring_write(ring,
4303                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4304                                                ext->reg_count));
4305                                 amdgpu_ring_write(ring,
4306                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4307                                 for (i = 0; i < ext->reg_count; i++)
4308                                         amdgpu_ring_write(ring, ext->extent[i]);
4309                         }
4310                 }
4311         }
4312
4313         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4314         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4315         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4316         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4317
4318         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4319         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4320
4321         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4322         amdgpu_ring_write(ring, 0);
4323
4324         /* init the CE partitions */
4325         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4326         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4327         amdgpu_ring_write(ring, 0x8000);
4328         amdgpu_ring_write(ring, 0x8000);
4329
4330         amdgpu_ring_commit(ring);
4331
4332         return 0;
4333 }
4334 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4335 {
4336         u32 tmp;
4337         /* no gfx doorbells on iceland */
4338         if (adev->asic_type == CHIP_TOPAZ)
4339                 return;
4340
4341         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4342
4343         if (ring->use_doorbell) {
4344                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4345                                 DOORBELL_OFFSET, ring->doorbell_index);
4346                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4347                                                 DOORBELL_HIT, 0);
4348                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4349                                             DOORBELL_EN, 1);
4350         } else {
4351                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4352         }
4353
4354         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4355
4356         if (adev->flags & AMD_IS_APU)
4357                 return;
4358
4359         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4360                                         DOORBELL_RANGE_LOWER,
4361                                         AMDGPU_DOORBELL_GFX_RING0);
4362         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4363
4364         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4365                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4366 }
4367
4368 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4369 {
4370         struct amdgpu_ring *ring;
4371         u32 tmp;
4372         u32 rb_bufsz;
4373         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4374         int r;
4375
4376         /* Set the write pointer delay */
4377         WREG32(mmCP_RB_WPTR_DELAY, 0);
4378
4379         /* set the RB to use vmid 0 */
4380         WREG32(mmCP_RB_VMID, 0);
4381
4382         /* Set ring buffer size */
4383         ring = &adev->gfx.gfx_ring[0];
4384         rb_bufsz = order_base_2(ring->ring_size / 8);
4385         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4386         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4387         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4388         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4389 #ifdef __BIG_ENDIAN
4390         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4391 #endif
4392         WREG32(mmCP_RB0_CNTL, tmp);
4393
4394         /* Initialize the ring buffer's read and write pointers */
4395         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4396         ring->wptr = 0;
4397         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4398
4399         /* set the wb address wether it's enabled or not */
4400         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4401         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4402         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4403
4404         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4405         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4406         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4407         mdelay(1);
4408         WREG32(mmCP_RB0_CNTL, tmp);
4409
4410         rb_addr = ring->gpu_addr >> 8;
4411         WREG32(mmCP_RB0_BASE, rb_addr);
4412         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4413
4414         gfx_v8_0_set_cpg_door_bell(adev, ring);
4415         /* start the ring */
4416         amdgpu_ring_clear_ring(ring);
4417         gfx_v8_0_cp_gfx_start(adev);
4418         ring->ready = true;
4419         r = amdgpu_ring_test_ring(ring);
4420         if (r)
4421                 ring->ready = false;
4422
4423         return r;
4424 }
4425
4426 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4427 {
4428         int i;
4429
4430         if (enable) {
4431                 WREG32(mmCP_MEC_CNTL, 0);
4432         } else {
4433                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4434                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4435                         adev->gfx.compute_ring[i].ready = false;
4436                 adev->gfx.kiq.ring.ready = false;
4437         }
4438         udelay(50);
4439 }
4440
4441 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4442 {
4443         const struct gfx_firmware_header_v1_0 *mec_hdr;
4444         const __le32 *fw_data;
4445         unsigned i, fw_size;
4446
4447         if (!adev->gfx.mec_fw)
4448                 return -EINVAL;
4449
4450         gfx_v8_0_cp_compute_enable(adev, false);
4451
4452         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4453         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4454
4455         fw_data = (const __le32 *)
4456                 (adev->gfx.mec_fw->data +
4457                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4458         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4459
4460         /* MEC1 */
4461         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4462         for (i = 0; i < fw_size; i++)
4463                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4464         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4465
4466         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4467         if (adev->gfx.mec2_fw) {
4468                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4469
4470                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4471                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4472
4473                 fw_data = (const __le32 *)
4474                         (adev->gfx.mec2_fw->data +
4475                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4476                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4477
4478                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4479                 for (i = 0; i < fw_size; i++)
4480                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4481                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4482         }
4483
4484         return 0;
4485 }
4486
4487 /* KIQ functions */
4488 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4489 {
4490         uint32_t tmp;
4491         struct amdgpu_device *adev = ring->adev;
4492
4493         /* tell RLC which is KIQ queue */
4494         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4495         tmp &= 0xffffff00;
4496         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4497         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4498         tmp |= 0x80;
4499         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4500 }
4501
4502 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4503 {
4504         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4505         uint32_t scratch, tmp = 0;
4506         uint64_t queue_mask = 0;
4507         int r, i;
4508
4509         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4510                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4511                         continue;
4512
4513                 /* This situation may be hit in the future if a new HW
4514                  * generation exposes more than 64 queues. If so, the
4515                  * definition of queue_mask needs updating */
4516                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4517                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4518                         break;
4519                 }
4520
4521                 queue_mask |= (1ull << i);
4522         }
4523
4524         r = amdgpu_gfx_scratch_get(adev, &scratch);
4525         if (r) {
4526                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
4527                 return r;
4528         }
4529         WREG32(scratch, 0xCAFEDEAD);
4530
4531         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
4532         if (r) {
4533                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4534                 amdgpu_gfx_scratch_free(adev, scratch);
4535                 return r;
4536         }
4537         /* set resources */
4538         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4539         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4540         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4541         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4542         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4543         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4544         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4545         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4546         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4547                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4548                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4549                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4550
4551                 /* map queues */
4552                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4553                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4554                 amdgpu_ring_write(kiq_ring,
4555                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4556                 amdgpu_ring_write(kiq_ring,
4557                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4558                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4559                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4560                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4561                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4562                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4563                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4564                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4565         }
4566         /* write to scratch for completion */
4567         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4568         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
4569         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
4570         amdgpu_ring_commit(kiq_ring);
4571
4572         for (i = 0; i < adev->usec_timeout; i++) {
4573                 tmp = RREG32(scratch);
4574                 if (tmp == 0xDEADBEEF)
4575                         break;
4576                 DRM_UDELAY(1);
4577         }
4578         if (i >= adev->usec_timeout) {
4579                 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
4580                           scratch, tmp);
4581                 r = -EINVAL;
4582         }
4583         amdgpu_gfx_scratch_free(adev, scratch);
4584
4585         return r;
4586 }
4587
4588 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4589 {
4590         int i, r = 0;
4591
4592         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4593                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4594                 for (i = 0; i < adev->usec_timeout; i++) {
4595                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4596                                 break;
4597                         udelay(1);
4598                 }
4599                 if (i == adev->usec_timeout)
4600                         r = -ETIMEDOUT;
4601         }
4602         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4603         WREG32(mmCP_HQD_PQ_RPTR, 0);
4604         WREG32(mmCP_HQD_PQ_WPTR, 0);
4605
4606         return r;
4607 }
4608
4609 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4610 {
4611         struct amdgpu_device *adev = ring->adev;
4612         struct vi_mqd *mqd = ring->mqd_ptr;
4613         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4614         uint32_t tmp;
4615
4616         mqd->header = 0xC0310800;
4617         mqd->compute_pipelinestat_enable = 0x00000001;
4618         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4619         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4620         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4621         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4622         mqd->compute_misc_reserved = 0x00000003;
4623         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4624                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4625         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4626                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4627         eop_base_addr = ring->eop_gpu_addr >> 8;
4628         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4629         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4630
4631         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4632         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4633         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4634                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4635
4636         mqd->cp_hqd_eop_control = tmp;
4637
4638         /* enable doorbell? */
4639         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4640                             CP_HQD_PQ_DOORBELL_CONTROL,
4641                             DOORBELL_EN,
4642                             ring->use_doorbell ? 1 : 0);
4643
4644         mqd->cp_hqd_pq_doorbell_control = tmp;
4645
4646         /* set the pointer to the MQD */
4647         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4648         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4649
4650         /* set MQD vmid to 0 */
4651         tmp = RREG32(mmCP_MQD_CONTROL);
4652         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4653         mqd->cp_mqd_control = tmp;
4654
4655         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4656         hqd_gpu_addr = ring->gpu_addr >> 8;
4657         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4658         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4659
4660         /* set up the HQD, this is similar to CP_RB0_CNTL */
4661         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4662         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4663                             (order_base_2(ring->ring_size / 4) - 1));
4664         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4665                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4666 #ifdef __BIG_ENDIAN
4667         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4668 #endif
4669         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4670         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4671         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4672         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4673         mqd->cp_hqd_pq_control = tmp;
4674
4675         /* set the wb address whether it's enabled or not */
4676         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4677         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4678         mqd->cp_hqd_pq_rptr_report_addr_hi =
4679                 upper_32_bits(wb_gpu_addr) & 0xffff;
4680
4681         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4682         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4683         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4684         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4685
4686         tmp = 0;
4687         /* enable the doorbell if requested */
4688         if (ring->use_doorbell) {
4689                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4690                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4691                                 DOORBELL_OFFSET, ring->doorbell_index);
4692
4693                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4694                                          DOORBELL_EN, 1);
4695                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4696                                          DOORBELL_SOURCE, 0);
4697                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4698                                          DOORBELL_HIT, 0);
4699         }
4700
4701         mqd->cp_hqd_pq_doorbell_control = tmp;
4702
4703         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4704         ring->wptr = 0;
4705         mqd->cp_hqd_pq_wptr = ring->wptr;
4706         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4707
4708         /* set the vmid for the queue */
4709         mqd->cp_hqd_vmid = 0;
4710
4711         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4712         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4713         mqd->cp_hqd_persistent_state = tmp;
4714
4715         /* set MTYPE */
4716         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4717         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4718         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4719         mqd->cp_hqd_ib_control = tmp;
4720
4721         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4722         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4723         mqd->cp_hqd_iq_timer = tmp;
4724
4725         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4726         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4727         mqd->cp_hqd_ctx_save_control = tmp;
4728
4729         /* defaults */
4730         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4731         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4732         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4733         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4734         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4735         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4736         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4737         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4738         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4739         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4740         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4741         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4742         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4743         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4744         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4745
4746         /* activate the queue */
4747         mqd->cp_hqd_active = 1;
4748
4749         return 0;
4750 }
4751
4752 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4753                         struct vi_mqd *mqd)
4754 {
4755         uint32_t mqd_reg;
4756         uint32_t *mqd_data;
4757
4758         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4759         mqd_data = &mqd->cp_mqd_base_addr_lo;
4760
4761         /* disable wptr polling */
4762         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4763
4764         /* program all HQD registers */
4765         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4766                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4767
4768         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4769          * This is safe since EOP RPTR==WPTR for any inactive HQD
4770          * on ASICs that do not support context-save.
4771          * EOP writes/reads can start anywhere in the ring.
4772          */
4773         if (adev->asic_type != CHIP_TONGA) {
4774                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4775                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4776                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4777         }
4778
4779         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4780                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4781
4782         /* activate the HQD */
4783         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4784                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4785
4786         return 0;
4787 }
4788
4789 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4790 {
4791         struct amdgpu_device *adev = ring->adev;
4792         struct vi_mqd *mqd = ring->mqd_ptr;
4793         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4794
4795         gfx_v8_0_kiq_setting(ring);
4796
4797         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4798                 /* reset MQD to a clean status */
4799                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4800                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4801
4802                 /* reset ring buffer */
4803                 ring->wptr = 0;
4804                 amdgpu_ring_clear_ring(ring);
4805                 mutex_lock(&adev->srbm_mutex);
4806                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4807                 gfx_v8_0_mqd_commit(adev, mqd);
4808                 vi_srbm_select(adev, 0, 0, 0, 0);
4809                 mutex_unlock(&adev->srbm_mutex);
4810         } else {
4811                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4812                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4813                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4814                 mutex_lock(&adev->srbm_mutex);
4815                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4816                 gfx_v8_0_mqd_init(ring);
4817                 gfx_v8_0_mqd_commit(adev, mqd);
4818                 vi_srbm_select(adev, 0, 0, 0, 0);
4819                 mutex_unlock(&adev->srbm_mutex);
4820
4821                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4822                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4823         }
4824
4825         return 0;
4826 }
4827
4828 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4829 {
4830         struct amdgpu_device *adev = ring->adev;
4831         struct vi_mqd *mqd = ring->mqd_ptr;
4832         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4833
4834         if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
4835                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4836                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4837                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4838                 mutex_lock(&adev->srbm_mutex);
4839                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4840                 gfx_v8_0_mqd_init(ring);
4841                 vi_srbm_select(adev, 0, 0, 0, 0);
4842                 mutex_unlock(&adev->srbm_mutex);
4843
4844                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4845                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4846         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4847                 /* reset MQD to a clean status */
4848                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4849                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4850                 /* reset ring buffer */
4851                 ring->wptr = 0;
4852                 amdgpu_ring_clear_ring(ring);
4853         } else {
4854                 amdgpu_ring_clear_ring(ring);
4855         }
4856         return 0;
4857 }
4858
4859 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4860 {
4861         if (adev->asic_type > CHIP_TONGA) {
4862                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4863                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4864         }
4865         /* enable doorbells */
4866         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4867 }
4868
4869 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4870 {
4871         struct amdgpu_ring *ring = NULL;
4872         int r = 0, i;
4873
4874         gfx_v8_0_cp_compute_enable(adev, true);
4875
4876         ring = &adev->gfx.kiq.ring;
4877
4878         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4879         if (unlikely(r != 0))
4880                 goto done;
4881
4882         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4883         if (!r) {
4884                 r = gfx_v8_0_kiq_init_queue(ring);
4885                 amdgpu_bo_kunmap(ring->mqd_obj);
4886                 ring->mqd_ptr = NULL;
4887         }
4888         amdgpu_bo_unreserve(ring->mqd_obj);
4889         if (r)
4890                 goto done;
4891
4892         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4893                 ring = &adev->gfx.compute_ring[i];
4894
4895                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4896                 if (unlikely(r != 0))
4897                         goto done;
4898                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4899                 if (!r) {
4900                         r = gfx_v8_0_kcq_init_queue(ring);
4901                         amdgpu_bo_kunmap(ring->mqd_obj);
4902                         ring->mqd_ptr = NULL;
4903                 }
4904                 amdgpu_bo_unreserve(ring->mqd_obj);
4905                 if (r)
4906                         goto done;
4907         }
4908
4909         gfx_v8_0_set_mec_doorbell_range(adev);
4910
4911         r = gfx_v8_0_kiq_kcq_enable(adev);
4912         if (r)
4913                 goto done;
4914
4915         /* Test KIQ */
4916         ring = &adev->gfx.kiq.ring;
4917         ring->ready = true;
4918         r = amdgpu_ring_test_ring(ring);
4919         if (r) {
4920                 ring->ready = false;
4921                 goto done;
4922         }
4923
4924         /* Test KCQs */
4925         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4926                 ring = &adev->gfx.compute_ring[i];
4927                 ring->ready = true;
4928                 r = amdgpu_ring_test_ring(ring);
4929                 if (r)
4930                         ring->ready = false;
4931         }
4932
4933 done:
4934         return r;
4935 }
4936
4937 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4938 {
4939         int r;
4940
4941         if (!(adev->flags & AMD_IS_APU))
4942                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4943
4944         if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
4945                         /* legacy firmware loading */
4946                 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4947                 if (r)
4948                         return r;
4949
4950                 r = gfx_v8_0_cp_compute_load_microcode(adev);
4951                 if (r)
4952                         return r;
4953         }
4954
4955         r = gfx_v8_0_cp_gfx_resume(adev);
4956         if (r)
4957                 return r;
4958
4959         r = gfx_v8_0_kiq_resume(adev);
4960         if (r)
4961                 return r;
4962
4963         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4964
4965         return 0;
4966 }
4967
4968 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4969 {
4970         gfx_v8_0_cp_gfx_enable(adev, enable);
4971         gfx_v8_0_cp_compute_enable(adev, enable);
4972 }
4973
4974 static int gfx_v8_0_hw_init(void *handle)
4975 {
4976         int r;
4977         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4978
4979         gfx_v8_0_init_golden_registers(adev);
4980         gfx_v8_0_gpu_init(adev);
4981
4982         r = gfx_v8_0_rlc_resume(adev);
4983         if (r)
4984                 return r;
4985
4986         r = gfx_v8_0_cp_resume(adev);
4987
4988         return r;
4989 }
4990
4991 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring)
4992 {
4993         struct amdgpu_device *adev = kiq_ring->adev;
4994         uint32_t scratch, tmp = 0;
4995         int r, i;
4996
4997         r = amdgpu_gfx_scratch_get(adev, &scratch);
4998         if (r) {
4999                 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
5000                 return r;
5001         }
5002         WREG32(scratch, 0xCAFEDEAD);
5003
5004         r = amdgpu_ring_alloc(kiq_ring, 10);
5005         if (r) {
5006                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
5007                 amdgpu_gfx_scratch_free(adev, scratch);
5008                 return r;
5009         }
5010
5011         /* unmap queues */
5012         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
5013         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
5014                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
5015                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
5016                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
5017                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
5018         amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
5019         amdgpu_ring_write(kiq_ring, 0);
5020         amdgpu_ring_write(kiq_ring, 0);
5021         amdgpu_ring_write(kiq_ring, 0);
5022         /* write to scratch for completion */
5023         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
5024         amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
5025         amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
5026         amdgpu_ring_commit(kiq_ring);
5027
5028         for (i = 0; i < adev->usec_timeout; i++) {
5029                 tmp = RREG32(scratch);
5030                 if (tmp == 0xDEADBEEF)
5031                         break;
5032                 DRM_UDELAY(1);
5033         }
5034         if (i >= adev->usec_timeout) {
5035                 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp);
5036                 r = -EINVAL;
5037         }
5038         amdgpu_gfx_scratch_free(adev, scratch);
5039         return r;
5040 }
5041
5042 static int gfx_v8_0_hw_fini(void *handle)
5043 {
5044         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5045         int i;
5046
5047         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5048         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5049
5050         /* disable KCQ to avoid CPC touch memory not valid anymore */
5051         for (i = 0; i < adev->gfx.num_compute_rings; i++)
5052                 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
5053
5054         if (amdgpu_sriov_vf(adev)) {
5055                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5056                 return 0;
5057         }
5058         gfx_v8_0_cp_enable(adev, false);
5059         gfx_v8_0_rlc_stop(adev);
5060
5061         amdgpu_device_ip_set_powergating_state(adev,
5062                                                AMD_IP_BLOCK_TYPE_GFX,
5063                                                AMD_PG_STATE_UNGATE);
5064
5065         return 0;
5066 }
5067
5068 static int gfx_v8_0_suspend(void *handle)
5069 {
5070         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5071         adev->gfx.in_suspend = true;
5072         return gfx_v8_0_hw_fini(adev);
5073 }
5074
5075 static int gfx_v8_0_resume(void *handle)
5076 {
5077         int r;
5078         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5079
5080         r = gfx_v8_0_hw_init(adev);
5081         adev->gfx.in_suspend = false;
5082         return r;
5083 }
5084
5085 static bool gfx_v8_0_is_idle(void *handle)
5086 {
5087         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5088
5089         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5090                 return false;
5091         else
5092                 return true;
5093 }
5094
5095 static int gfx_v8_0_wait_for_idle(void *handle)
5096 {
5097         unsigned i;
5098         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5099
5100         for (i = 0; i < adev->usec_timeout; i++) {
5101                 if (gfx_v8_0_is_idle(handle))
5102                         return 0;
5103
5104                 udelay(1);
5105         }
5106         return -ETIMEDOUT;
5107 }
5108
5109 static bool gfx_v8_0_check_soft_reset(void *handle)
5110 {
5111         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5112         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5113         u32 tmp;
5114
5115         /* GRBM_STATUS */
5116         tmp = RREG32(mmGRBM_STATUS);
5117         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5118                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5119                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5120                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5121                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5122                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5123                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5124                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5125                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5126                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5127                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5128                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5129                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5130         }
5131
5132         /* GRBM_STATUS2 */
5133         tmp = RREG32(mmGRBM_STATUS2);
5134         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5135                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5136                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5137
5138         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5139             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5140             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5141                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5142                                                 SOFT_RESET_CPF, 1);
5143                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5144                                                 SOFT_RESET_CPC, 1);
5145                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5146                                                 SOFT_RESET_CPG, 1);
5147                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5148                                                 SOFT_RESET_GRBM, 1);
5149         }
5150
5151         /* SRBM_STATUS */
5152         tmp = RREG32(mmSRBM_STATUS);
5153         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5154                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5155                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5156         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5157                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5158                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5159
5160         if (grbm_soft_reset || srbm_soft_reset) {
5161                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5162                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5163                 return true;
5164         } else {
5165                 adev->gfx.grbm_soft_reset = 0;
5166                 adev->gfx.srbm_soft_reset = 0;
5167                 return false;
5168         }
5169 }
5170
5171 static int gfx_v8_0_pre_soft_reset(void *handle)
5172 {
5173         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5174         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5175
5176         if ((!adev->gfx.grbm_soft_reset) &&
5177             (!adev->gfx.srbm_soft_reset))
5178                 return 0;
5179
5180         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5181         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5182
5183         /* stop the rlc */
5184         gfx_v8_0_rlc_stop(adev);
5185
5186         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5187             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5188                 /* Disable GFX parsing/prefetching */
5189                 gfx_v8_0_cp_gfx_enable(adev, false);
5190
5191         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5192             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5193             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5194             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5195                 int i;
5196
5197                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5198                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5199
5200                         mutex_lock(&adev->srbm_mutex);
5201                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5202                         gfx_v8_0_deactivate_hqd(adev, 2);
5203                         vi_srbm_select(adev, 0, 0, 0, 0);
5204                         mutex_unlock(&adev->srbm_mutex);
5205                 }
5206                 /* Disable MEC parsing/prefetching */
5207                 gfx_v8_0_cp_compute_enable(adev, false);
5208         }
5209
5210        return 0;
5211 }
5212
5213 static int gfx_v8_0_soft_reset(void *handle)
5214 {
5215         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5216         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5217         u32 tmp;
5218
5219         if ((!adev->gfx.grbm_soft_reset) &&
5220             (!adev->gfx.srbm_soft_reset))
5221                 return 0;
5222
5223         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5224         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5225
5226         if (grbm_soft_reset || srbm_soft_reset) {
5227                 tmp = RREG32(mmGMCON_DEBUG);
5228                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5229                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5230                 WREG32(mmGMCON_DEBUG, tmp);
5231                 udelay(50);
5232         }
5233
5234         if (grbm_soft_reset) {
5235                 tmp = RREG32(mmGRBM_SOFT_RESET);
5236                 tmp |= grbm_soft_reset;
5237                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5238                 WREG32(mmGRBM_SOFT_RESET, tmp);
5239                 tmp = RREG32(mmGRBM_SOFT_RESET);
5240
5241                 udelay(50);
5242
5243                 tmp &= ~grbm_soft_reset;
5244                 WREG32(mmGRBM_SOFT_RESET, tmp);
5245                 tmp = RREG32(mmGRBM_SOFT_RESET);
5246         }
5247
5248         if (srbm_soft_reset) {
5249                 tmp = RREG32(mmSRBM_SOFT_RESET);
5250                 tmp |= srbm_soft_reset;
5251                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5252                 WREG32(mmSRBM_SOFT_RESET, tmp);
5253                 tmp = RREG32(mmSRBM_SOFT_RESET);
5254
5255                 udelay(50);
5256
5257                 tmp &= ~srbm_soft_reset;
5258                 WREG32(mmSRBM_SOFT_RESET, tmp);
5259                 tmp = RREG32(mmSRBM_SOFT_RESET);
5260         }
5261
5262         if (grbm_soft_reset || srbm_soft_reset) {
5263                 tmp = RREG32(mmGMCON_DEBUG);
5264                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5265                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5266                 WREG32(mmGMCON_DEBUG, tmp);
5267         }
5268
5269         /* Wait a little for things to settle down */
5270         udelay(50);
5271
5272         return 0;
5273 }
5274
5275 static int gfx_v8_0_post_soft_reset(void *handle)
5276 {
5277         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5278         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5279
5280         if ((!adev->gfx.grbm_soft_reset) &&
5281             (!adev->gfx.srbm_soft_reset))
5282                 return 0;
5283
5284         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5285         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5286
5287         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5288             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5289                 gfx_v8_0_cp_gfx_resume(adev);
5290
5291         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5292             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5293             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5294             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5295                 int i;
5296
5297                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5298                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5299
5300                         mutex_lock(&adev->srbm_mutex);
5301                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5302                         gfx_v8_0_deactivate_hqd(adev, 2);
5303                         vi_srbm_select(adev, 0, 0, 0, 0);
5304                         mutex_unlock(&adev->srbm_mutex);
5305                 }
5306                 gfx_v8_0_kiq_resume(adev);
5307         }
5308         gfx_v8_0_rlc_start(adev);
5309
5310         return 0;
5311 }
5312
5313 /**
5314  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5315  *
5316  * @adev: amdgpu_device pointer
5317  *
5318  * Fetches a GPU clock counter snapshot.
5319  * Returns the 64 bit clock counter snapshot.
5320  */
5321 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5322 {
5323         uint64_t clock;
5324
5325         mutex_lock(&adev->gfx.gpu_clock_mutex);
5326         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5327         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5328                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5329         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5330         return clock;
5331 }
5332
5333 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5334                                           uint32_t vmid,
5335                                           uint32_t gds_base, uint32_t gds_size,
5336                                           uint32_t gws_base, uint32_t gws_size,
5337                                           uint32_t oa_base, uint32_t oa_size)
5338 {
5339         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5340         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5341
5342         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5343         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5344
5345         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5346         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5347
5348         /* GDS Base */
5349         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5350         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5351                                 WRITE_DATA_DST_SEL(0)));
5352         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5353         amdgpu_ring_write(ring, 0);
5354         amdgpu_ring_write(ring, gds_base);
5355
5356         /* GDS Size */
5357         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5358         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5359                                 WRITE_DATA_DST_SEL(0)));
5360         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5361         amdgpu_ring_write(ring, 0);
5362         amdgpu_ring_write(ring, gds_size);
5363
5364         /* GWS */
5365         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5366         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5367                                 WRITE_DATA_DST_SEL(0)));
5368         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5369         amdgpu_ring_write(ring, 0);
5370         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5371
5372         /* OA */
5373         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5374         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5375                                 WRITE_DATA_DST_SEL(0)));
5376         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5377         amdgpu_ring_write(ring, 0);
5378         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5379 }
5380
5381 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5382 {
5383         WREG32(mmSQ_IND_INDEX,
5384                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5385                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5386                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5387                 (SQ_IND_INDEX__FORCE_READ_MASK));
5388         return RREG32(mmSQ_IND_DATA);
5389 }
5390
5391 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5392                            uint32_t wave, uint32_t thread,
5393                            uint32_t regno, uint32_t num, uint32_t *out)
5394 {
5395         WREG32(mmSQ_IND_INDEX,
5396                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5397                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5398                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5399                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5400                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5401                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5402         while (num--)
5403                 *(out++) = RREG32(mmSQ_IND_DATA);
5404 }
5405
5406 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5407 {
5408         /* type 0 wave data */
5409         dst[(*no_fields)++] = 0;
5410         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5411         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5412         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5413         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5414         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5415         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5416         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5417         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5418         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5419         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5420         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5421         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5422         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5423         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5424         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5425         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5426         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5427         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5428 }
5429
5430 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5431                                      uint32_t wave, uint32_t start,
5432                                      uint32_t size, uint32_t *dst)
5433 {
5434         wave_read_regs(
5435                 adev, simd, wave, 0,
5436                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5437 }
5438
5439
5440 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5441         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5442         .select_se_sh = &gfx_v8_0_select_se_sh,
5443         .read_wave_data = &gfx_v8_0_read_wave_data,
5444         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5445 };
5446
5447 static int gfx_v8_0_early_init(void *handle)
5448 {
5449         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5450
5451         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5452         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5453         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5454         gfx_v8_0_set_ring_funcs(adev);
5455         gfx_v8_0_set_irq_funcs(adev);
5456         gfx_v8_0_set_gds_init(adev);
5457         gfx_v8_0_set_rlc_funcs(adev);
5458
5459         return 0;
5460 }
5461
5462 static int gfx_v8_0_late_init(void *handle)
5463 {
5464         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5465         int r;
5466
5467         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5468         if (r)
5469                 return r;
5470
5471         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5472         if (r)
5473                 return r;
5474
5475         /* requires IBs so do in late init after IB pool is initialized */
5476         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5477         if (r)
5478                 return r;
5479
5480         amdgpu_device_ip_set_powergating_state(adev,
5481                                                AMD_IP_BLOCK_TYPE_GFX,
5482                                                AMD_PG_STATE_GATE);
5483
5484         return 0;
5485 }
5486
5487 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5488                                                        bool enable)
5489 {
5490         if ((adev->asic_type == CHIP_POLARIS11) ||
5491             (adev->asic_type == CHIP_POLARIS12))
5492                 /* Send msg to SMU via Powerplay */
5493                 amdgpu_device_ip_set_powergating_state(adev,
5494                                                        AMD_IP_BLOCK_TYPE_SMC,
5495                                                        enable ?
5496                                                        AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5497
5498         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5499 }
5500
5501 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5502                                                         bool enable)
5503 {
5504         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5505 }
5506
5507 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5508                 bool enable)
5509 {
5510         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5511 }
5512
5513 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5514                                           bool enable)
5515 {
5516         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5517 }
5518
5519 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5520                                                 bool enable)
5521 {
5522         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5523
5524         /* Read any GFX register to wake up GFX. */
5525         if (!enable)
5526                 RREG32(mmDB_RENDER_CONTROL);
5527 }
5528
5529 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5530                                           bool enable)
5531 {
5532         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5533                 cz_enable_gfx_cg_power_gating(adev, true);
5534                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5535                         cz_enable_gfx_pipeline_power_gating(adev, true);
5536         } else {
5537                 cz_enable_gfx_cg_power_gating(adev, false);
5538                 cz_enable_gfx_pipeline_power_gating(adev, false);
5539         }
5540 }
5541
5542 static int gfx_v8_0_set_powergating_state(void *handle,
5543                                           enum amd_powergating_state state)
5544 {
5545         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5546         bool enable = (state == AMD_PG_STATE_GATE);
5547
5548         if (amdgpu_sriov_vf(adev))
5549                 return 0;
5550
5551         switch (adev->asic_type) {
5552         case CHIP_CARRIZO:
5553         case CHIP_STONEY:
5554
5555                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5556                         cz_enable_sck_slow_down_on_power_up(adev, true);
5557                         cz_enable_sck_slow_down_on_power_down(adev, true);
5558                 } else {
5559                         cz_enable_sck_slow_down_on_power_up(adev, false);
5560                         cz_enable_sck_slow_down_on_power_down(adev, false);
5561                 }
5562                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5563                         cz_enable_cp_power_gating(adev, true);
5564                 else
5565                         cz_enable_cp_power_gating(adev, false);
5566
5567                 cz_update_gfx_cg_power_gating(adev, enable);
5568
5569                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5570                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5571                 else
5572                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5573
5574                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5575                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5576                 else
5577                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5578                 break;
5579         case CHIP_POLARIS11:
5580         case CHIP_POLARIS12:
5581                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5582                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5583                 else
5584                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5585
5586                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5587                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5588                 else
5589                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5590
5591                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5592                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5593                 else
5594                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5595                 break;
5596         default:
5597                 break;
5598         }
5599
5600         return 0;
5601 }
5602
5603 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5604 {
5605         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5606         int data;
5607
5608         if (amdgpu_sriov_vf(adev))
5609                 *flags = 0;
5610
5611         /* AMD_CG_SUPPORT_GFX_MGCG */
5612         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5613         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5614                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5615
5616         /* AMD_CG_SUPPORT_GFX_CGLG */
5617         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5618         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5619                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5620
5621         /* AMD_CG_SUPPORT_GFX_CGLS */
5622         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5623                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5624
5625         /* AMD_CG_SUPPORT_GFX_CGTS */
5626         data = RREG32(mmCGTS_SM_CTRL_REG);
5627         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5628                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5629
5630         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5631         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5632                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5633
5634         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5635         data = RREG32(mmRLC_MEM_SLP_CNTL);
5636         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5637                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5638
5639         /* AMD_CG_SUPPORT_GFX_CP_LS */
5640         data = RREG32(mmCP_MEM_SLP_CNTL);
5641         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5642                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5643 }
5644
5645 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5646                                      uint32_t reg_addr, uint32_t cmd)
5647 {
5648         uint32_t data;
5649
5650         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5651
5652         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5653         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5654
5655         data = RREG32(mmRLC_SERDES_WR_CTRL);
5656         if (adev->asic_type == CHIP_STONEY)
5657                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5658                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5659                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5660                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5661                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5662                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5663                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5664                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5665                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5666         else
5667                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5668                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5669                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5670                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5671                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5672                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5673                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5674                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5675                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5676                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5677                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5678         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5679                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5680                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5681                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5682
5683         WREG32(mmRLC_SERDES_WR_CTRL, data);
5684 }
5685
5686 #define MSG_ENTER_RLC_SAFE_MODE     1
5687 #define MSG_EXIT_RLC_SAFE_MODE      0
5688 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5689 #define RLC_GPR_REG2__REQ__SHIFT 0
5690 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5691 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5692
5693 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5694 {
5695         u32 data;
5696         unsigned i;
5697
5698         data = RREG32(mmRLC_CNTL);
5699         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5700                 return;
5701
5702         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5703                 data |= RLC_SAFE_MODE__CMD_MASK;
5704                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5705                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5706                 WREG32(mmRLC_SAFE_MODE, data);
5707
5708                 for (i = 0; i < adev->usec_timeout; i++) {
5709                         if ((RREG32(mmRLC_GPM_STAT) &
5710                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5711                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5712                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5713                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5714                                 break;
5715                         udelay(1);
5716                 }
5717
5718                 for (i = 0; i < adev->usec_timeout; i++) {
5719                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5720                                 break;
5721                         udelay(1);
5722                 }
5723                 adev->gfx.rlc.in_safe_mode = true;
5724         }
5725 }
5726
5727 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5728 {
5729         u32 data = 0;
5730         unsigned i;
5731
5732         data = RREG32(mmRLC_CNTL);
5733         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5734                 return;
5735
5736         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5737                 if (adev->gfx.rlc.in_safe_mode) {
5738                         data |= RLC_SAFE_MODE__CMD_MASK;
5739                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5740                         WREG32(mmRLC_SAFE_MODE, data);
5741                         adev->gfx.rlc.in_safe_mode = false;
5742                 }
5743         }
5744
5745         for (i = 0; i < adev->usec_timeout; i++) {
5746                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5747                         break;
5748                 udelay(1);
5749         }
5750 }
5751
5752 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5753         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5754         .exit_safe_mode = iceland_exit_rlc_safe_mode
5755 };
5756
5757 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5758                                                       bool enable)
5759 {
5760         uint32_t temp, data;
5761
5762         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5763
5764         /* It is disabled by HW by default */
5765         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5766                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5767                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5768                                 /* 1 - RLC memory Light sleep */
5769                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5770
5771                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5772                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5773                 }
5774
5775                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5776                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5777                 if (adev->flags & AMD_IS_APU)
5778                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5779                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5780                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5781                 else
5782                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5783                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5784                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5785                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5786
5787                 if (temp != data)
5788                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5789
5790                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5791                 gfx_v8_0_wait_for_rlc_serdes(adev);
5792
5793                 /* 5 - clear mgcg override */
5794                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5795
5796                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5797                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5798                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5799                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5800                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5801                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5802                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5803                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5804                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5805                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5806                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5807                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5808                         if (temp != data)
5809                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5810                 }
5811                 udelay(50);
5812
5813                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5814                 gfx_v8_0_wait_for_rlc_serdes(adev);
5815         } else {
5816                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5817                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5818                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5819                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5820                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5821                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5822                 if (temp != data)
5823                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5824
5825                 /* 2 - disable MGLS in RLC */
5826                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5827                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5828                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5829                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5830                 }
5831
5832                 /* 3 - disable MGLS in CP */
5833                 data = RREG32(mmCP_MEM_SLP_CNTL);
5834                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5835                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5836                         WREG32(mmCP_MEM_SLP_CNTL, data);
5837                 }
5838
5839                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5840                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5841                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5842                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5843                 if (temp != data)
5844                         WREG32(mmCGTS_SM_CTRL_REG, data);
5845
5846                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5847                 gfx_v8_0_wait_for_rlc_serdes(adev);
5848
5849                 /* 6 - set mgcg override */
5850                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5851
5852                 udelay(50);
5853
5854                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5855                 gfx_v8_0_wait_for_rlc_serdes(adev);
5856         }
5857
5858         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5859 }
5860
5861 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5862                                                       bool enable)
5863 {
5864         uint32_t temp, temp1, data, data1;
5865
5866         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5867
5868         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5869
5870         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5871                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5872                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5873                 if (temp1 != data1)
5874                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5875
5876                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5877                 gfx_v8_0_wait_for_rlc_serdes(adev);
5878
5879                 /* 2 - clear cgcg override */
5880                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5881
5882                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5883                 gfx_v8_0_wait_for_rlc_serdes(adev);
5884
5885                 /* 3 - write cmd to set CGLS */
5886                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5887
5888                 /* 4 - enable cgcg */
5889                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5890
5891                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5892                         /* enable cgls*/
5893                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5894
5895                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5896                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5897
5898                         if (temp1 != data1)
5899                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5900                 } else {
5901                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5902                 }
5903
5904                 if (temp != data)
5905                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5906
5907                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5908                  * Cmp_busy/GFX_Idle interrupts
5909                  */
5910                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5911         } else {
5912                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5913                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5914
5915                 /* TEST CGCG */
5916                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5917                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5918                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5919                 if (temp1 != data1)
5920                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5921
5922                 /* read gfx register to wake up cgcg */
5923                 RREG32(mmCB_CGTT_SCLK_CTRL);
5924                 RREG32(mmCB_CGTT_SCLK_CTRL);
5925                 RREG32(mmCB_CGTT_SCLK_CTRL);
5926                 RREG32(mmCB_CGTT_SCLK_CTRL);
5927
5928                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5929                 gfx_v8_0_wait_for_rlc_serdes(adev);
5930
5931                 /* write cmd to Set CGCG Overrride */
5932                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5933
5934                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5935                 gfx_v8_0_wait_for_rlc_serdes(adev);
5936
5937                 /* write cmd to Clear CGLS */
5938                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5939
5940                 /* disable cgcg, cgls should be disabled too. */
5941                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5942                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5943                 if (temp != data)
5944                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5945                 /* enable interrupts again for PG */
5946                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5947         }
5948
5949         gfx_v8_0_wait_for_rlc_serdes(adev);
5950
5951         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5952 }
5953 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5954                                             bool enable)
5955 {
5956         if (enable) {
5957                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5958                  * ===  MGCG + MGLS + TS(CG/LS) ===
5959                  */
5960                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5961                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5962         } else {
5963                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5964                  * ===  CGCG + CGLS ===
5965                  */
5966                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5967                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5968         }
5969         return 0;
5970 }
5971
5972 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5973                                           enum amd_clockgating_state state)
5974 {
5975         uint32_t msg_id, pp_state = 0;
5976         uint32_t pp_support_state = 0;
5977
5978         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5979                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5980                         pp_support_state = PP_STATE_SUPPORT_LS;
5981                         pp_state = PP_STATE_LS;
5982                 }
5983                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5984                         pp_support_state |= PP_STATE_SUPPORT_CG;
5985                         pp_state |= PP_STATE_CG;
5986                 }
5987                 if (state == AMD_CG_STATE_UNGATE)
5988                         pp_state = 0;
5989
5990                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5991                                 PP_BLOCK_GFX_CG,
5992                                 pp_support_state,
5993                                 pp_state);
5994                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5995                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5996         }
5997
5998         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5999                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6000                         pp_support_state = PP_STATE_SUPPORT_LS;
6001                         pp_state = PP_STATE_LS;
6002                 }
6003
6004                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6005                         pp_support_state |= PP_STATE_SUPPORT_CG;
6006                         pp_state |= PP_STATE_CG;
6007                 }
6008
6009                 if (state == AMD_CG_STATE_UNGATE)
6010                         pp_state = 0;
6011
6012                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6013                                 PP_BLOCK_GFX_MG,
6014                                 pp_support_state,
6015                                 pp_state);
6016                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6017                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6018         }
6019
6020         return 0;
6021 }
6022
6023 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6024                                           enum amd_clockgating_state state)
6025 {
6026
6027         uint32_t msg_id, pp_state = 0;
6028         uint32_t pp_support_state = 0;
6029
6030         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6031                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6032                         pp_support_state = PP_STATE_SUPPORT_LS;
6033                         pp_state = PP_STATE_LS;
6034                 }
6035                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6036                         pp_support_state |= PP_STATE_SUPPORT_CG;
6037                         pp_state |= PP_STATE_CG;
6038                 }
6039                 if (state == AMD_CG_STATE_UNGATE)
6040                         pp_state = 0;
6041
6042                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6043                                 PP_BLOCK_GFX_CG,
6044                                 pp_support_state,
6045                                 pp_state);
6046                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6047                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6048         }
6049
6050         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6051                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6052                         pp_support_state = PP_STATE_SUPPORT_LS;
6053                         pp_state = PP_STATE_LS;
6054                 }
6055                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6056                         pp_support_state |= PP_STATE_SUPPORT_CG;
6057                         pp_state |= PP_STATE_CG;
6058                 }
6059                 if (state == AMD_CG_STATE_UNGATE)
6060                         pp_state = 0;
6061
6062                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6063                                 PP_BLOCK_GFX_3D,
6064                                 pp_support_state,
6065                                 pp_state);
6066                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6067                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6068         }
6069
6070         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6071                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6072                         pp_support_state = PP_STATE_SUPPORT_LS;
6073                         pp_state = PP_STATE_LS;
6074                 }
6075
6076                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6077                         pp_support_state |= PP_STATE_SUPPORT_CG;
6078                         pp_state |= PP_STATE_CG;
6079                 }
6080
6081                 if (state == AMD_CG_STATE_UNGATE)
6082                         pp_state = 0;
6083
6084                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6085                                 PP_BLOCK_GFX_MG,
6086                                 pp_support_state,
6087                                 pp_state);
6088                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6089                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6090         }
6091
6092         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6093                 pp_support_state = PP_STATE_SUPPORT_LS;
6094
6095                 if (state == AMD_CG_STATE_UNGATE)
6096                         pp_state = 0;
6097                 else
6098                         pp_state = PP_STATE_LS;
6099
6100                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6101                                 PP_BLOCK_GFX_RLC,
6102                                 pp_support_state,
6103                                 pp_state);
6104                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6105                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6106         }
6107
6108         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6109                 pp_support_state = PP_STATE_SUPPORT_LS;
6110
6111                 if (state == AMD_CG_STATE_UNGATE)
6112                         pp_state = 0;
6113                 else
6114                         pp_state = PP_STATE_LS;
6115                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6116                         PP_BLOCK_GFX_CP,
6117                         pp_support_state,
6118                         pp_state);
6119                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6120                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6121         }
6122
6123         return 0;
6124 }
6125
6126 static int gfx_v8_0_set_clockgating_state(void *handle,
6127                                           enum amd_clockgating_state state)
6128 {
6129         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6130
6131         if (amdgpu_sriov_vf(adev))
6132                 return 0;
6133
6134         switch (adev->asic_type) {
6135         case CHIP_FIJI:
6136         case CHIP_CARRIZO:
6137         case CHIP_STONEY:
6138                 gfx_v8_0_update_gfx_clock_gating(adev,
6139                                                  state == AMD_CG_STATE_GATE);
6140                 break;
6141         case CHIP_TONGA:
6142                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6143                 break;
6144         case CHIP_POLARIS10:
6145         case CHIP_POLARIS11:
6146         case CHIP_POLARIS12:
6147                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6148                 break;
6149         default:
6150                 break;
6151         }
6152         return 0;
6153 }
6154
6155 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6156 {
6157         return ring->adev->wb.wb[ring->rptr_offs];
6158 }
6159
6160 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6161 {
6162         struct amdgpu_device *adev = ring->adev;
6163
6164         if (ring->use_doorbell)
6165                 /* XXX check if swapping is necessary on BE */
6166                 return ring->adev->wb.wb[ring->wptr_offs];
6167         else
6168                 return RREG32(mmCP_RB0_WPTR);
6169 }
6170
6171 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6172 {
6173         struct amdgpu_device *adev = ring->adev;
6174
6175         if (ring->use_doorbell) {
6176                 /* XXX check if swapping is necessary on BE */
6177                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6178                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6179         } else {
6180                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6181                 (void)RREG32(mmCP_RB0_WPTR);
6182         }
6183 }
6184
6185 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6186 {
6187         u32 ref_and_mask, reg_mem_engine;
6188
6189         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6190             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6191                 switch (ring->me) {
6192                 case 1:
6193                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6194                         break;
6195                 case 2:
6196                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6197                         break;
6198                 default:
6199                         return;
6200                 }
6201                 reg_mem_engine = 0;
6202         } else {
6203                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6204                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6205         }
6206
6207         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6208         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6209                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6210                                  reg_mem_engine));
6211         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6212         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6213         amdgpu_ring_write(ring, ref_and_mask);
6214         amdgpu_ring_write(ring, ref_and_mask);
6215         amdgpu_ring_write(ring, 0x20); /* poll interval */
6216 }
6217
6218 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6219 {
6220         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6221         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6222                 EVENT_INDEX(4));
6223
6224         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6225         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6226                 EVENT_INDEX(0));
6227 }
6228
6229 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6230                                       struct amdgpu_ib *ib,
6231                                       unsigned vmid, bool ctx_switch)
6232 {
6233         u32 header, control = 0;
6234
6235         if (ib->flags & AMDGPU_IB_FLAG_CE)
6236                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6237         else
6238                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6239
6240         control |= ib->length_dw | (vmid << 24);
6241
6242         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6243                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6244
6245                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6246                         gfx_v8_0_ring_emit_de_meta(ring);
6247         }
6248
6249         amdgpu_ring_write(ring, header);
6250         amdgpu_ring_write(ring,
6251 #ifdef __BIG_ENDIAN
6252                           (2 << 0) |
6253 #endif
6254                           (ib->gpu_addr & 0xFFFFFFFC));
6255         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6256         amdgpu_ring_write(ring, control);
6257 }
6258
6259 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6260                                           struct amdgpu_ib *ib,
6261                                           unsigned vmid, bool ctx_switch)
6262 {
6263         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6264
6265         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6266         amdgpu_ring_write(ring,
6267 #ifdef __BIG_ENDIAN
6268                                 (2 << 0) |
6269 #endif
6270                                 (ib->gpu_addr & 0xFFFFFFFC));
6271         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6272         amdgpu_ring_write(ring, control);
6273 }
6274
6275 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6276                                          u64 seq, unsigned flags)
6277 {
6278         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6279         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6280
6281         /* EVENT_WRITE_EOP - flush caches, send int */
6282         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6283         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6284                                  EOP_TC_ACTION_EN |
6285                                  EOP_TC_WB_ACTION_EN |
6286                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6287                                  EVENT_INDEX(5)));
6288         amdgpu_ring_write(ring, addr & 0xfffffffc);
6289         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6290                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6291         amdgpu_ring_write(ring, lower_32_bits(seq));
6292         amdgpu_ring_write(ring, upper_32_bits(seq));
6293
6294 }
6295
6296 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6297 {
6298         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6299         uint32_t seq = ring->fence_drv.sync_seq;
6300         uint64_t addr = ring->fence_drv.gpu_addr;
6301
6302         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6303         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6304                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6305                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6306         amdgpu_ring_write(ring, addr & 0xfffffffc);
6307         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6308         amdgpu_ring_write(ring, seq);
6309         amdgpu_ring_write(ring, 0xffffffff);
6310         amdgpu_ring_write(ring, 4); /* poll interval */
6311 }
6312
6313 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6314                                         unsigned vmid, uint64_t pd_addr)
6315 {
6316         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6317
6318         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6319
6320         /* wait for the invalidate to complete */
6321         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6322         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6323                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6324                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6325         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6326         amdgpu_ring_write(ring, 0);
6327         amdgpu_ring_write(ring, 0); /* ref */
6328         amdgpu_ring_write(ring, 0); /* mask */
6329         amdgpu_ring_write(ring, 0x20); /* poll interval */
6330
6331         /* compute doesn't have PFP */
6332         if (usepfp) {
6333                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6334                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6335                 amdgpu_ring_write(ring, 0x0);
6336         }
6337 }
6338
6339 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6340 {
6341         return ring->adev->wb.wb[ring->wptr_offs];
6342 }
6343
6344 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6345 {
6346         struct amdgpu_device *adev = ring->adev;
6347
6348         /* XXX check if swapping is necessary on BE */
6349         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6350         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6351 }
6352
6353 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6354                                            bool acquire)
6355 {
6356         struct amdgpu_device *adev = ring->adev;
6357         int pipe_num, tmp, reg;
6358         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6359
6360         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6361
6362         /* first me only has 2 entries, GFX and HP3D */
6363         if (ring->me > 0)
6364                 pipe_num -= 2;
6365
6366         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6367         tmp = RREG32(reg);
6368         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6369         WREG32(reg, tmp);
6370 }
6371
6372 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6373                                             struct amdgpu_ring *ring,
6374                                             bool acquire)
6375 {
6376         int i, pipe;
6377         bool reserve;
6378         struct amdgpu_ring *iring;
6379
6380         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6381         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6382         if (acquire)
6383                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6384         else
6385                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6386
6387         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6388                 /* Clear all reservations - everyone reacquires all resources */
6389                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6390                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6391                                                        true);
6392
6393                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6394                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6395                                                        true);
6396         } else {
6397                 /* Lower all pipes without a current reservation */
6398                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6399                         iring = &adev->gfx.gfx_ring[i];
6400                         pipe = amdgpu_gfx_queue_to_bit(adev,
6401                                                        iring->me,
6402                                                        iring->pipe,
6403                                                        0);
6404                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6405                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6406                 }
6407
6408                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6409                         iring = &adev->gfx.compute_ring[i];
6410                         pipe = amdgpu_gfx_queue_to_bit(adev,
6411                                                        iring->me,
6412                                                        iring->pipe,
6413                                                        0);
6414                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6415                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6416                 }
6417         }
6418
6419         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6420 }
6421
6422 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6423                                       struct amdgpu_ring *ring,
6424                                       bool acquire)
6425 {
6426         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6427         uint32_t queue_priority = acquire ? 0xf : 0x0;
6428
6429         mutex_lock(&adev->srbm_mutex);
6430         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6431
6432         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6433         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6434
6435         vi_srbm_select(adev, 0, 0, 0, 0);
6436         mutex_unlock(&adev->srbm_mutex);
6437 }
6438 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6439                                                enum drm_sched_priority priority)
6440 {
6441         struct amdgpu_device *adev = ring->adev;
6442         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6443
6444         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6445                 return;
6446
6447         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6448         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6449 }
6450
6451 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6452                                              u64 addr, u64 seq,
6453                                              unsigned flags)
6454 {
6455         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6456         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6457
6458         /* RELEASE_MEM - flush caches, send int */
6459         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6460         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6461                                  EOP_TC_ACTION_EN |
6462                                  EOP_TC_WB_ACTION_EN |
6463                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6464                                  EVENT_INDEX(5)));
6465         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6466         amdgpu_ring_write(ring, addr & 0xfffffffc);
6467         amdgpu_ring_write(ring, upper_32_bits(addr));
6468         amdgpu_ring_write(ring, lower_32_bits(seq));
6469         amdgpu_ring_write(ring, upper_32_bits(seq));
6470 }
6471
6472 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6473                                          u64 seq, unsigned int flags)
6474 {
6475         /* we only allocate 32bit for each seq wb address */
6476         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6477
6478         /* write fence seq to the "addr" */
6479         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6480         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6481                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6482         amdgpu_ring_write(ring, lower_32_bits(addr));
6483         amdgpu_ring_write(ring, upper_32_bits(addr));
6484         amdgpu_ring_write(ring, lower_32_bits(seq));
6485
6486         if (flags & AMDGPU_FENCE_FLAG_INT) {
6487                 /* set register to trigger INT */
6488                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6489                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6490                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6491                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6492                 amdgpu_ring_write(ring, 0);
6493                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6494         }
6495 }
6496
6497 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6498 {
6499         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6500         amdgpu_ring_write(ring, 0);
6501 }
6502
6503 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6504 {
6505         uint32_t dw2 = 0;
6506
6507         if (amdgpu_sriov_vf(ring->adev))
6508                 gfx_v8_0_ring_emit_ce_meta(ring);
6509
6510         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6511         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6512                 gfx_v8_0_ring_emit_vgt_flush(ring);
6513                 /* set load_global_config & load_global_uconfig */
6514                 dw2 |= 0x8001;
6515                 /* set load_cs_sh_regs */
6516                 dw2 |= 0x01000000;
6517                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6518                 dw2 |= 0x10002;
6519
6520                 /* set load_ce_ram if preamble presented */
6521                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6522                         dw2 |= 0x10000000;
6523         } else {
6524                 /* still load_ce_ram if this is the first time preamble presented
6525                  * although there is no context switch happens.
6526                  */
6527                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6528                         dw2 |= 0x10000000;
6529         }
6530
6531         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6532         amdgpu_ring_write(ring, dw2);
6533         amdgpu_ring_write(ring, 0);
6534 }
6535
6536 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6537 {
6538         unsigned ret;
6539
6540         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6541         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6542         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6543         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6544         ret = ring->wptr & ring->buf_mask;
6545         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6546         return ret;
6547 }
6548
6549 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6550 {
6551         unsigned cur;
6552
6553         BUG_ON(offset > ring->buf_mask);
6554         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6555
6556         cur = (ring->wptr & ring->buf_mask) - 1;
6557         if (likely(cur > offset))
6558                 ring->ring[offset] = cur - offset;
6559         else
6560                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6561 }
6562
6563 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6564 {
6565         struct amdgpu_device *adev = ring->adev;
6566
6567         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6568         amdgpu_ring_write(ring, 0 |     /* src: register*/
6569                                 (5 << 8) |      /* dst: memory */
6570                                 (1 << 20));     /* write confirm */
6571         amdgpu_ring_write(ring, reg);
6572         amdgpu_ring_write(ring, 0);
6573         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6574                                 adev->virt.reg_val_offs * 4));
6575         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6576                                 adev->virt.reg_val_offs * 4));
6577 }
6578
6579 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6580                                   uint32_t val)
6581 {
6582         uint32_t cmd;
6583
6584         switch (ring->funcs->type) {
6585         case AMDGPU_RING_TYPE_GFX:
6586                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6587                 break;
6588         case AMDGPU_RING_TYPE_KIQ:
6589                 cmd = 1 << 16; /* no inc addr */
6590                 break;
6591         default:
6592                 cmd = WR_CONFIRM;
6593                 break;
6594         }
6595
6596         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6597         amdgpu_ring_write(ring, cmd);
6598         amdgpu_ring_write(ring, reg);
6599         amdgpu_ring_write(ring, 0);
6600         amdgpu_ring_write(ring, val);
6601 }
6602
6603 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6604                                                  enum amdgpu_interrupt_state state)
6605 {
6606         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6607                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6608 }
6609
6610 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6611                                                      int me, int pipe,
6612                                                      enum amdgpu_interrupt_state state)
6613 {
6614         u32 mec_int_cntl, mec_int_cntl_reg;
6615
6616         /*
6617          * amdgpu controls only the first MEC. That's why this function only
6618          * handles the setting of interrupts for this specific MEC. All other
6619          * pipes' interrupts are set by amdkfd.
6620          */
6621
6622         if (me == 1) {
6623                 switch (pipe) {
6624                 case 0:
6625                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6626                         break;
6627                 case 1:
6628                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6629                         break;
6630                 case 2:
6631                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6632                         break;
6633                 case 3:
6634                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6635                         break;
6636                 default:
6637                         DRM_DEBUG("invalid pipe %d\n", pipe);
6638                         return;
6639                 }
6640         } else {
6641                 DRM_DEBUG("invalid me %d\n", me);
6642                 return;
6643         }
6644
6645         switch (state) {
6646         case AMDGPU_IRQ_STATE_DISABLE:
6647                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6648                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6649                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6650                 break;
6651         case AMDGPU_IRQ_STATE_ENABLE:
6652                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6653                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6654                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6655                 break;
6656         default:
6657                 break;
6658         }
6659 }
6660
6661 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6662                                              struct amdgpu_irq_src *source,
6663                                              unsigned type,
6664                                              enum amdgpu_interrupt_state state)
6665 {
6666         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6667                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6668
6669         return 0;
6670 }
6671
6672 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6673                                               struct amdgpu_irq_src *source,
6674                                               unsigned type,
6675                                               enum amdgpu_interrupt_state state)
6676 {
6677         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6678                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6679
6680         return 0;
6681 }
6682
6683 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6684                                             struct amdgpu_irq_src *src,
6685                                             unsigned type,
6686                                             enum amdgpu_interrupt_state state)
6687 {
6688         switch (type) {
6689         case AMDGPU_CP_IRQ_GFX_EOP:
6690                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6691                 break;
6692         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6693                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6694                 break;
6695         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6696                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6697                 break;
6698         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6699                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6700                 break;
6701         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6702                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6703                 break;
6704         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6705                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6706                 break;
6707         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6708                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6709                 break;
6710         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6711                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6712                 break;
6713         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6714                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6715                 break;
6716         default:
6717                 break;
6718         }
6719         return 0;
6720 }
6721
6722 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6723                             struct amdgpu_irq_src *source,
6724                             struct amdgpu_iv_entry *entry)
6725 {
6726         int i;
6727         u8 me_id, pipe_id, queue_id;
6728         struct amdgpu_ring *ring;
6729
6730         DRM_DEBUG("IH: CP EOP\n");
6731         me_id = (entry->ring_id & 0x0c) >> 2;
6732         pipe_id = (entry->ring_id & 0x03) >> 0;
6733         queue_id = (entry->ring_id & 0x70) >> 4;
6734
6735         switch (me_id) {
6736         case 0:
6737                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6738                 break;
6739         case 1:
6740         case 2:
6741                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6742                         ring = &adev->gfx.compute_ring[i];
6743                         /* Per-queue interrupt is supported for MEC starting from VI.
6744                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6745                           */
6746                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6747                                 amdgpu_fence_process(ring);
6748                 }
6749                 break;
6750         }
6751         return 0;
6752 }
6753
6754 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6755                                  struct amdgpu_irq_src *source,
6756                                  struct amdgpu_iv_entry *entry)
6757 {
6758         DRM_ERROR("Illegal register access in command stream\n");
6759         schedule_work(&adev->reset_work);
6760         return 0;
6761 }
6762
6763 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6764                                   struct amdgpu_irq_src *source,
6765                                   struct amdgpu_iv_entry *entry)
6766 {
6767         DRM_ERROR("Illegal instruction in command stream\n");
6768         schedule_work(&adev->reset_work);
6769         return 0;
6770 }
6771
6772 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6773                                             struct amdgpu_irq_src *src,
6774                                             unsigned int type,
6775                                             enum amdgpu_interrupt_state state)
6776 {
6777         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6778
6779         switch (type) {
6780         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6781                 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
6782                              state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6783                 if (ring->me == 1)
6784                         WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
6785                                      ring->pipe,
6786                                      GENERIC2_INT_ENABLE,
6787                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6788                 else
6789                         WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
6790                                      ring->pipe,
6791                                      GENERIC2_INT_ENABLE,
6792                                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6793                 break;
6794         default:
6795                 BUG(); /* kiq only support GENERIC2_INT now */
6796                 break;
6797         }
6798         return 0;
6799 }
6800
6801 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6802                             struct amdgpu_irq_src *source,
6803                             struct amdgpu_iv_entry *entry)
6804 {
6805         u8 me_id, pipe_id, queue_id;
6806         struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
6807
6808         me_id = (entry->ring_id & 0x0c) >> 2;
6809         pipe_id = (entry->ring_id & 0x03) >> 0;
6810         queue_id = (entry->ring_id & 0x70) >> 4;
6811         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6812                    me_id, pipe_id, queue_id);
6813
6814         amdgpu_fence_process(ring);
6815         return 0;
6816 }
6817
6818 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6819         .name = "gfx_v8_0",
6820         .early_init = gfx_v8_0_early_init,
6821         .late_init = gfx_v8_0_late_init,
6822         .sw_init = gfx_v8_0_sw_init,
6823         .sw_fini = gfx_v8_0_sw_fini,
6824         .hw_init = gfx_v8_0_hw_init,
6825         .hw_fini = gfx_v8_0_hw_fini,
6826         .suspend = gfx_v8_0_suspend,
6827         .resume = gfx_v8_0_resume,
6828         .is_idle = gfx_v8_0_is_idle,
6829         .wait_for_idle = gfx_v8_0_wait_for_idle,
6830         .check_soft_reset = gfx_v8_0_check_soft_reset,
6831         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6832         .soft_reset = gfx_v8_0_soft_reset,
6833         .post_soft_reset = gfx_v8_0_post_soft_reset,
6834         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6835         .set_powergating_state = gfx_v8_0_set_powergating_state,
6836         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6837 };
6838
6839 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6840         .type = AMDGPU_RING_TYPE_GFX,
6841         .align_mask = 0xff,
6842         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6843         .support_64bit_ptrs = false,
6844         .get_rptr = gfx_v8_0_ring_get_rptr,
6845         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6846         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6847         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6848                 5 +  /* COND_EXEC */
6849                 7 +  /* PIPELINE_SYNC */
6850                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6851                 8 +  /* FENCE for VM_FLUSH */
6852                 20 + /* GDS switch */
6853                 4 + /* double SWITCH_BUFFER,
6854                        the first COND_EXEC jump to the place just
6855                            prior to this double SWITCH_BUFFER  */
6856                 5 + /* COND_EXEC */
6857                 7 +      /*     HDP_flush */
6858                 4 +      /*     VGT_flush */
6859                 14 + /* CE_META */
6860                 31 + /* DE_META */
6861                 3 + /* CNTX_CTRL */
6862                 5 + /* HDP_INVL */
6863                 8 + 8 + /* FENCE x2 */
6864                 2, /* SWITCH_BUFFER */
6865         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6866         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6867         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6868         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6869         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6870         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6871         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6872         .test_ring = gfx_v8_0_ring_test_ring,
6873         .test_ib = gfx_v8_0_ring_test_ib,
6874         .insert_nop = amdgpu_ring_insert_nop,
6875         .pad_ib = amdgpu_ring_generic_pad_ib,
6876         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6877         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6878         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6879         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6880         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6881 };
6882
6883 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6884         .type = AMDGPU_RING_TYPE_COMPUTE,
6885         .align_mask = 0xff,
6886         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6887         .support_64bit_ptrs = false,
6888         .get_rptr = gfx_v8_0_ring_get_rptr,
6889         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6890         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6891         .emit_frame_size =
6892                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6893                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6894                 5 + /* hdp_invalidate */
6895                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6896                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6897                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6898         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6899         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6900         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6901         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6902         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6903         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6904         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6905         .test_ring = gfx_v8_0_ring_test_ring,
6906         .test_ib = gfx_v8_0_ring_test_ib,
6907         .insert_nop = amdgpu_ring_insert_nop,
6908         .pad_ib = amdgpu_ring_generic_pad_ib,
6909         .set_priority = gfx_v8_0_ring_set_priority_compute,
6910         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6911 };
6912
6913 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6914         .type = AMDGPU_RING_TYPE_KIQ,
6915         .align_mask = 0xff,
6916         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6917         .support_64bit_ptrs = false,
6918         .get_rptr = gfx_v8_0_ring_get_rptr,
6919         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6920         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6921         .emit_frame_size =
6922                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6923                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6924                 5 + /* hdp_invalidate */
6925                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6926                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6927                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6928         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6929         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6930         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6931         .test_ring = gfx_v8_0_ring_test_ring,
6932         .test_ib = gfx_v8_0_ring_test_ib,
6933         .insert_nop = amdgpu_ring_insert_nop,
6934         .pad_ib = amdgpu_ring_generic_pad_ib,
6935         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6936         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6937 };
6938
6939 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6940 {
6941         int i;
6942
6943         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6944
6945         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6946                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6947
6948         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6949                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6950 }
6951
6952 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6953         .set = gfx_v8_0_set_eop_interrupt_state,
6954         .process = gfx_v8_0_eop_irq,
6955 };
6956
6957 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6958         .set = gfx_v8_0_set_priv_reg_fault_state,
6959         .process = gfx_v8_0_priv_reg_irq,
6960 };
6961
6962 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6963         .set = gfx_v8_0_set_priv_inst_fault_state,
6964         .process = gfx_v8_0_priv_inst_irq,
6965 };
6966
6967 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
6968         .set = gfx_v8_0_kiq_set_interrupt_state,
6969         .process = gfx_v8_0_kiq_irq,
6970 };
6971
6972 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6973 {
6974         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6975         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6976
6977         adev->gfx.priv_reg_irq.num_types = 1;
6978         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6979
6980         adev->gfx.priv_inst_irq.num_types = 1;
6981         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6982
6983         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
6984         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6985 }
6986
6987 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6988 {
6989         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6990 }
6991
6992 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6993 {
6994         /* init asci gds info */
6995         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6996         adev->gds.gws.total_size = 64;
6997         adev->gds.oa.total_size = 16;
6998
6999         if (adev->gds.mem.total_size == 64 * 1024) {
7000                 adev->gds.mem.gfx_partition_size = 4096;
7001                 adev->gds.mem.cs_partition_size = 4096;
7002
7003                 adev->gds.gws.gfx_partition_size = 4;
7004                 adev->gds.gws.cs_partition_size = 4;
7005
7006                 adev->gds.oa.gfx_partition_size = 4;
7007                 adev->gds.oa.cs_partition_size = 1;
7008         } else {
7009                 adev->gds.mem.gfx_partition_size = 1024;
7010                 adev->gds.mem.cs_partition_size = 1024;
7011
7012                 adev->gds.gws.gfx_partition_size = 16;
7013                 adev->gds.gws.cs_partition_size = 16;
7014
7015                 adev->gds.oa.gfx_partition_size = 4;
7016                 adev->gds.oa.cs_partition_size = 4;
7017         }
7018 }
7019
7020 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7021                                                  u32 bitmap)
7022 {
7023         u32 data;
7024
7025         if (!bitmap)
7026                 return;
7027
7028         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7029         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7030
7031         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7032 }
7033
7034 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7035 {
7036         u32 data, mask;
7037
7038         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7039                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7040
7041         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7042
7043         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7044 }
7045
7046 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7047 {
7048         int i, j, k, counter, active_cu_number = 0;
7049         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7050         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7051         unsigned disable_masks[4 * 2];
7052         u32 ao_cu_num;
7053
7054         memset(cu_info, 0, sizeof(*cu_info));
7055
7056         if (adev->flags & AMD_IS_APU)
7057                 ao_cu_num = 2;
7058         else
7059                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7060
7061         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7062
7063         mutex_lock(&adev->grbm_idx_mutex);
7064         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7065                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7066                         mask = 1;
7067                         ao_bitmap = 0;
7068                         counter = 0;
7069                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7070                         if (i < 4 && j < 2)
7071                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7072                                         adev, disable_masks[i * 2 + j]);
7073                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7074                         cu_info->bitmap[i][j] = bitmap;
7075
7076                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7077                                 if (bitmap & mask) {
7078                                         if (counter < ao_cu_num)
7079                                                 ao_bitmap |= mask;
7080                                         counter ++;
7081                                 }
7082                                 mask <<= 1;
7083                         }
7084                         active_cu_number += counter;
7085                         if (i < 2 && j < 2)
7086                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7087                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7088                 }
7089         }
7090         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7091         mutex_unlock(&adev->grbm_idx_mutex);
7092
7093         cu_info->number = active_cu_number;
7094         cu_info->ao_cu_mask = ao_cu_mask;
7095         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7096         cu_info->max_waves_per_simd = 10;
7097         cu_info->max_scratch_slots_per_cu = 32;
7098         cu_info->wave_front_size = 64;
7099         cu_info->lds_size = 64;
7100 }
7101
7102 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7103 {
7104         .type = AMD_IP_BLOCK_TYPE_GFX,
7105         .major = 8,
7106         .minor = 0,
7107         .rev = 0,
7108         .funcs = &gfx_v8_0_ip_funcs,
7109 };
7110
7111 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7112 {
7113         .type = AMD_IP_BLOCK_TYPE_GFX,
7114         .major = 8,
7115         .minor = 1,
7116         .rev = 0,
7117         .funcs = &gfx_v8_0_ip_funcs,
7118 };
7119
7120 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7121 {
7122         uint64_t ce_payload_addr;
7123         int cnt_ce;
7124         union {
7125                 struct vi_ce_ib_state regular;
7126                 struct vi_ce_ib_state_chained_ib chained;
7127         } ce_payload = {};
7128
7129         if (ring->adev->virt.chained_ib_support) {
7130                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7131                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7132                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7133         } else {
7134                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7135                         offsetof(struct vi_gfx_meta_data, ce_payload);
7136                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7137         }
7138
7139         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7140         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7141                                 WRITE_DATA_DST_SEL(8) |
7142                                 WR_CONFIRM) |
7143                                 WRITE_DATA_CACHE_POLICY(0));
7144         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7145         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7146         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7147 }
7148
7149 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7150 {
7151         uint64_t de_payload_addr, gds_addr, csa_addr;
7152         int cnt_de;
7153         union {
7154                 struct vi_de_ib_state regular;
7155                 struct vi_de_ib_state_chained_ib chained;
7156         } de_payload = {};
7157
7158         csa_addr = amdgpu_csa_vaddr(ring->adev);
7159         gds_addr = csa_addr + 4096;
7160         if (ring->adev->virt.chained_ib_support) {
7161                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7162                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7163                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7164                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7165         } else {
7166                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7167                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7168                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7169                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7170         }
7171
7172         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7173         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7174                                 WRITE_DATA_DST_SEL(8) |
7175                                 WR_CONFIRM) |
7176                                 WRITE_DATA_CACHE_POLICY(0));
7177         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7178         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7179         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7180 }