Merge tag 'drm-misc-next-fixes-2018-10-31' of git://anongit.freedesktop.org/drm/drm...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/kernel.h>
24 #include <linux/firmware.h>
25 #include <drm/drmP.h>
26 #include "amdgpu.h"
27 #include "amdgpu_gfx.h"
28 #include "vi.h"
29 #include "vi_structs.h"
30 #include "vid.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_atombios.h"
33 #include "atombios_i2c.h"
34 #include "clearstate_vi.h"
35
36 #include "gmc/gmc_8_2_d.h"
37 #include "gmc/gmc_8_2_sh_mask.h"
38
39 #include "oss/oss_3_0_d.h"
40 #include "oss/oss_3_0_sh_mask.h"
41
42 #include "bif/bif_5_0_d.h"
43 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #include "ivsrcid/ivsrcid_vislands30.h"
55
56 #define GFX8_NUM_GFX_RINGS     1
57 #define GFX8_MEC_HPD_SIZE 2048
58
59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
63
64 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
65 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
66 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
67 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
68 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
69 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
70 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
71 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
72 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
73
74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
80
81 /* BPM SERDES CMD */
82 #define SET_BPM_SERDES_CMD    1
83 #define CLE_BPM_SERDES_CMD    0
84
85 /* BPM Register Address*/
86 enum {
87         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
88         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
89         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
90         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
91         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
92         BPM_REG_FGCG_MAX
93 };
94
95 #define RLC_FormatDirectRegListLength        14
96
97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
103
104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
116
117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
122
123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
129
130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
153
154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
165
166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
168 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
172
173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
174 {
175         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
176         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
177         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
178         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
179         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
180         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
181         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
182         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
183         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
184         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
185         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
186         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
187         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
188         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
189         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
190         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
191 };
192
193 static const u32 golden_settings_tonga_a11[] =
194 {
195         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
196         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
197         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
198         mmGB_GPU_ID, 0x0000000f, 0x00000000,
199         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
200         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
201         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
202         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
203         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
204         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
205         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
206         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
207         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
208         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
209         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
210         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
211 };
212
213 static const u32 tonga_golden_common_all[] =
214 {
215         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
216         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
217         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
218         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
219         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
220         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
221         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
222         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
223 };
224
225 static const u32 tonga_mgcg_cgcg_init[] =
226 {
227         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
228         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
229         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
230         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
231         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
232         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
234         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
236         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
237         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
238         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
239         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
241         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
245         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
246         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
249         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
250         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
251         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
252         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
253         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
254         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
255         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
256         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
258         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
259         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
260         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
261         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
262         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
263         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
264         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
265         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
266         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
267         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
268         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
269         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
270         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
271         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
272         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
273         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
274         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
275         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
276         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
277         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
278         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
279         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
280         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
281         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
282         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
283         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
284         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
285         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
286         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
287         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
288         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
289         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
290         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
291         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
292         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
293         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
294         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
295         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
296         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
297         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
298         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
299         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
300         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
301         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
302 };
303
304 static const u32 golden_settings_vegam_a11[] =
305 {
306         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
307         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
308         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
309         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
310         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
311         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
312         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
313         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
314         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
315         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
316         mmSQ_CONFIG, 0x07f80000, 0x01180000,
317         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
318         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
319         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
320         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
321         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
322         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
323 };
324
325 static const u32 vegam_golden_common_all[] =
326 {
327         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
328         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
329         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
330         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
331         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
332         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
333 };
334
335 static const u32 golden_settings_polaris11_a11[] =
336 {
337         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
338         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
339         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
340         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
341         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
342         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
343         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
344         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
345         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
346         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
347         mmSQ_CONFIG, 0x07f80000, 0x01180000,
348         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
349         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
350         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
351         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
352         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
353         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
354 };
355
356 static const u32 polaris11_golden_common_all[] =
357 {
358         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
359         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
360         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
361         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
362         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
363         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
364 };
365
366 static const u32 golden_settings_polaris10_a11[] =
367 {
368         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
369         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
370         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
371         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
372         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
373         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
374         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
375         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
376         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
377         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
378         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
379         mmSQ_CONFIG, 0x07f80000, 0x07180000,
380         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
381         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
382         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
383         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
384         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
385 };
386
387 static const u32 polaris10_golden_common_all[] =
388 {
389         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
390         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
391         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
392         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
393         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
394         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
395         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
396         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
397 };
398
399 static const u32 fiji_golden_common_all[] =
400 {
401         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
402         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
403         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
404         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
405         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
406         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
407         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
408         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
409         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
410         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
411 };
412
413 static const u32 golden_settings_fiji_a10[] =
414 {
415         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
416         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
417         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
418         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
419         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
420         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
421         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
422         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
423         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
424         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
425         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
426 };
427
428 static const u32 fiji_mgcg_cgcg_init[] =
429 {
430         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
431         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
432         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
433         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
434         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
435         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
437         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
439         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
440         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
441         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
442         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
444         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
448         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
449         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
452         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
453         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
456         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
457         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
458         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
459         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
461         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
462         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
463         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
464         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
465 };
466
467 static const u32 golden_settings_iceland_a11[] =
468 {
469         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
470         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
471         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
472         mmGB_GPU_ID, 0x0000000f, 0x00000000,
473         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
474         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
475         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
476         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
477         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
478         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
479         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
480         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
481         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
482         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
483         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
484         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
485 };
486
487 static const u32 iceland_golden_common_all[] =
488 {
489         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
490         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
491         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
492         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
493         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
494         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
495         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
496         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
497 };
498
499 static const u32 iceland_mgcg_cgcg_init[] =
500 {
501         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
502         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
503         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
504         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
505         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
506         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
507         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
508         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
509         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
510         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
511         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
512         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
513         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
515         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
519         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
520         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
523         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
524         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
525         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
526         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
527         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
528         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
529         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
530         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
532         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
533         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
534         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
535         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
536         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
537         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
538         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
539         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
540         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
541         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
542         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
543         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
544         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
545         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
546         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
547         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
548         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
549         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
550         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
551         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
552         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
553         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
554         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
555         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
556         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
557         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
558         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
559         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
560         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
561         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
562         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
563         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
564         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
565 };
566
567 static const u32 cz_golden_settings_a11[] =
568 {
569         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
570         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
571         mmGB_GPU_ID, 0x0000000f, 0x00000000,
572         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
573         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
574         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
575         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
576         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
577         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
578         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
579         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
580         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
581 };
582
583 static const u32 cz_golden_common_all[] =
584 {
585         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
586         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
587         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
588         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
589         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
590         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
591         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
592         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
593 };
594
595 static const u32 cz_mgcg_cgcg_init[] =
596 {
597         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
598         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
599         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
600         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
601         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
602         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
606         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
608         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
609         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
611         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
615         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
616         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
619         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
620         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
621         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
622         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
623         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
624         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
625         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
626         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
628         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
629         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
630         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
631         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
632         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
633         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
634         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
635         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
636         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
637         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
638         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
639         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
640         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
641         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
642         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
643         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
644         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
645         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
646         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
647         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
648         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
649         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
650         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
651         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
652         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
653         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
654         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
655         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
656         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
657         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
658         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
659         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
660         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
661         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
662         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
663         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
664         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
665         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
666         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
667         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
668         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
669         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
670         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
671         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
672 };
673
674 static const u32 stoney_golden_settings_a11[] =
675 {
676         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
677         mmGB_GPU_ID, 0x0000000f, 0x00000000,
678         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
679         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
680         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
681         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
682         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
683         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
684         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
685         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
686 };
687
688 static const u32 stoney_golden_common_all[] =
689 {
690         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
691         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
692         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
693         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
694         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
695         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
696         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
697         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
698 };
699
700 static const u32 stoney_mgcg_cgcg_init[] =
701 {
702         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
703         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
704         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
705         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
706         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
707 };
708
709
710 static const char * const sq_edc_source_names[] = {
711         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
712         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
713         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
714         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
715         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
716         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
717         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
718 };
719
720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
728
729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
730 {
731         switch (adev->asic_type) {
732         case CHIP_TOPAZ:
733                 amdgpu_device_program_register_sequence(adev,
734                                                         iceland_mgcg_cgcg_init,
735                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
736                 amdgpu_device_program_register_sequence(adev,
737                                                         golden_settings_iceland_a11,
738                                                         ARRAY_SIZE(golden_settings_iceland_a11));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         iceland_golden_common_all,
741                                                         ARRAY_SIZE(iceland_golden_common_all));
742                 break;
743         case CHIP_FIJI:
744                 amdgpu_device_program_register_sequence(adev,
745                                                         fiji_mgcg_cgcg_init,
746                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
747                 amdgpu_device_program_register_sequence(adev,
748                                                         golden_settings_fiji_a10,
749                                                         ARRAY_SIZE(golden_settings_fiji_a10));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         fiji_golden_common_all,
752                                                         ARRAY_SIZE(fiji_golden_common_all));
753                 break;
754
755         case CHIP_TONGA:
756                 amdgpu_device_program_register_sequence(adev,
757                                                         tonga_mgcg_cgcg_init,
758                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
759                 amdgpu_device_program_register_sequence(adev,
760                                                         golden_settings_tonga_a11,
761                                                         ARRAY_SIZE(golden_settings_tonga_a11));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         tonga_golden_common_all,
764                                                         ARRAY_SIZE(tonga_golden_common_all));
765                 break;
766         case CHIP_VEGAM:
767                 amdgpu_device_program_register_sequence(adev,
768                                                         golden_settings_vegam_a11,
769                                                         ARRAY_SIZE(golden_settings_vegam_a11));
770                 amdgpu_device_program_register_sequence(adev,
771                                                         vegam_golden_common_all,
772                                                         ARRAY_SIZE(vegam_golden_common_all));
773                 break;
774         case CHIP_POLARIS11:
775         case CHIP_POLARIS12:
776                 amdgpu_device_program_register_sequence(adev,
777                                                         golden_settings_polaris11_a11,
778                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
779                 amdgpu_device_program_register_sequence(adev,
780                                                         polaris11_golden_common_all,
781                                                         ARRAY_SIZE(polaris11_golden_common_all));
782                 break;
783         case CHIP_POLARIS10:
784                 amdgpu_device_program_register_sequence(adev,
785                                                         golden_settings_polaris10_a11,
786                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
787                 amdgpu_device_program_register_sequence(adev,
788                                                         polaris10_golden_common_all,
789                                                         ARRAY_SIZE(polaris10_golden_common_all));
790                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
791                 if (adev->pdev->revision == 0xc7 &&
792                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
793                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
794                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
795                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
796                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
797                 }
798                 break;
799         case CHIP_CARRIZO:
800                 amdgpu_device_program_register_sequence(adev,
801                                                         cz_mgcg_cgcg_init,
802                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_golden_settings_a11,
805                                                         ARRAY_SIZE(cz_golden_settings_a11));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_common_all,
808                                                         ARRAY_SIZE(cz_golden_common_all));
809                 break;
810         case CHIP_STONEY:
811                 amdgpu_device_program_register_sequence(adev,
812                                                         stoney_mgcg_cgcg_init,
813                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_golden_settings_a11,
816                                                         ARRAY_SIZE(stoney_golden_settings_a11));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_common_all,
819                                                         ARRAY_SIZE(stoney_golden_common_all));
820                 break;
821         default:
822                 break;
823         }
824 }
825
826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
827 {
828         adev->gfx.scratch.num_reg = 8;
829         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
830         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
831 }
832
833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
834 {
835         struct amdgpu_device *adev = ring->adev;
836         uint32_t scratch;
837         uint32_t tmp = 0;
838         unsigned i;
839         int r;
840
841         r = amdgpu_gfx_scratch_get(adev, &scratch);
842         if (r) {
843                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
844                 return r;
845         }
846         WREG32(scratch, 0xCAFEDEAD);
847         r = amdgpu_ring_alloc(ring, 3);
848         if (r) {
849                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
850                           ring->idx, r);
851                 amdgpu_gfx_scratch_free(adev, scratch);
852                 return r;
853         }
854         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
855         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
856         amdgpu_ring_write(ring, 0xDEADBEEF);
857         amdgpu_ring_commit(ring);
858
859         for (i = 0; i < adev->usec_timeout; i++) {
860                 tmp = RREG32(scratch);
861                 if (tmp == 0xDEADBEEF)
862                         break;
863                 DRM_UDELAY(1);
864         }
865         if (i < adev->usec_timeout) {
866                 DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
867                          ring->idx, i);
868         } else {
869                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
870                           ring->idx, scratch, tmp);
871                 r = -EINVAL;
872         }
873         amdgpu_gfx_scratch_free(adev, scratch);
874         return r;
875 }
876
877 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
878 {
879         struct amdgpu_device *adev = ring->adev;
880         struct amdgpu_ib ib;
881         struct dma_fence *f = NULL;
882
883         unsigned int index;
884         uint64_t gpu_addr;
885         uint32_t tmp;
886         long r;
887
888         r = amdgpu_device_wb_get(adev, &index);
889         if (r) {
890                 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
891                 return r;
892         }
893
894         gpu_addr = adev->wb.gpu_addr + (index * 4);
895         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
896         memset(&ib, 0, sizeof(ib));
897         r = amdgpu_ib_get(adev, NULL, 16, &ib);
898         if (r) {
899                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
900                 goto err1;
901         }
902         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
903         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
904         ib.ptr[2] = lower_32_bits(gpu_addr);
905         ib.ptr[3] = upper_32_bits(gpu_addr);
906         ib.ptr[4] = 0xDEADBEEF;
907         ib.length_dw = 5;
908
909         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
910         if (r)
911                 goto err2;
912
913         r = dma_fence_wait_timeout(f, false, timeout);
914         if (r == 0) {
915                 DRM_ERROR("amdgpu: IB test timed out.\n");
916                 r = -ETIMEDOUT;
917                 goto err2;
918         } else if (r < 0) {
919                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
920                 goto err2;
921         }
922
923         tmp = adev->wb.wb[index];
924         if (tmp == 0xDEADBEEF) {
925                 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
926                 r = 0;
927         } else {
928                 DRM_ERROR("ib test on ring %d failed\n", ring->idx);
929                 r = -EINVAL;
930         }
931
932 err2:
933         amdgpu_ib_free(adev, &ib, NULL);
934         dma_fence_put(f);
935 err1:
936         amdgpu_device_wb_free(adev, index);
937         return r;
938 }
939
940
941 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
942 {
943         release_firmware(adev->gfx.pfp_fw);
944         adev->gfx.pfp_fw = NULL;
945         release_firmware(adev->gfx.me_fw);
946         adev->gfx.me_fw = NULL;
947         release_firmware(adev->gfx.ce_fw);
948         adev->gfx.ce_fw = NULL;
949         release_firmware(adev->gfx.rlc_fw);
950         adev->gfx.rlc_fw = NULL;
951         release_firmware(adev->gfx.mec_fw);
952         adev->gfx.mec_fw = NULL;
953         if ((adev->asic_type != CHIP_STONEY) &&
954             (adev->asic_type != CHIP_TOPAZ))
955                 release_firmware(adev->gfx.mec2_fw);
956         adev->gfx.mec2_fw = NULL;
957
958         kfree(adev->gfx.rlc.register_list_format);
959 }
960
961 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
962 {
963         const char *chip_name;
964         char fw_name[30];
965         int err;
966         struct amdgpu_firmware_info *info = NULL;
967         const struct common_firmware_header *header = NULL;
968         const struct gfx_firmware_header_v1_0 *cp_hdr;
969         const struct rlc_firmware_header_v2_0 *rlc_hdr;
970         unsigned int *tmp = NULL, i;
971
972         DRM_DEBUG("\n");
973
974         switch (adev->asic_type) {
975         case CHIP_TOPAZ:
976                 chip_name = "topaz";
977                 break;
978         case CHIP_TONGA:
979                 chip_name = "tonga";
980                 break;
981         case CHIP_CARRIZO:
982                 chip_name = "carrizo";
983                 break;
984         case CHIP_FIJI:
985                 chip_name = "fiji";
986                 break;
987         case CHIP_STONEY:
988                 chip_name = "stoney";
989                 break;
990         case CHIP_POLARIS10:
991                 chip_name = "polaris10";
992                 break;
993         case CHIP_POLARIS11:
994                 chip_name = "polaris11";
995                 break;
996         case CHIP_POLARIS12:
997                 chip_name = "polaris12";
998                 break;
999         case CHIP_VEGAM:
1000                 chip_name = "vegam";
1001                 break;
1002         default:
1003                 BUG();
1004         }
1005
1006         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1007                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
1008                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1009                 if (err == -ENOENT) {
1010                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1011                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1012                 }
1013         } else {
1014                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1015                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1016         }
1017         if (err)
1018                 goto out;
1019         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1020         if (err)
1021                 goto out;
1022         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1023         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1024         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1025
1026         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1027                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1028                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1029                 if (err == -ENOENT) {
1030                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1031                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1032                 }
1033         } else {
1034                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1035                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1036         }
1037         if (err)
1038                 goto out;
1039         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1040         if (err)
1041                 goto out;
1042         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1043         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1044
1045         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1046
1047         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1048                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1049                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050                 if (err == -ENOENT) {
1051                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1052                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1053                 }
1054         } else {
1055                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1056                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1057         }
1058         if (err)
1059                 goto out;
1060         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1061         if (err)
1062                 goto out;
1063         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1064         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1065         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1066
1067         /*
1068          * Support for MCBP/Virtualization in combination with chained IBs is
1069          * formal released on feature version #46
1070          */
1071         if (adev->gfx.ce_feature_version >= 46 &&
1072             adev->gfx.pfp_feature_version >= 46) {
1073                 adev->virt.chained_ib_support = true;
1074                 DRM_INFO("Chained IB support enabled!\n");
1075         } else
1076                 adev->virt.chained_ib_support = false;
1077
1078         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1079         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1080         if (err)
1081                 goto out;
1082         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1083         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1084         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1085         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1086
1087         adev->gfx.rlc.save_and_restore_offset =
1088                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1089         adev->gfx.rlc.clear_state_descriptor_offset =
1090                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1091         adev->gfx.rlc.avail_scratch_ram_locations =
1092                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1093         adev->gfx.rlc.reg_restore_list_size =
1094                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1095         adev->gfx.rlc.reg_list_format_start =
1096                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1097         adev->gfx.rlc.reg_list_format_separate_start =
1098                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1099         adev->gfx.rlc.starting_offsets_start =
1100                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1101         adev->gfx.rlc.reg_list_format_size_bytes =
1102                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1103         adev->gfx.rlc.reg_list_size_bytes =
1104                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1105
1106         adev->gfx.rlc.register_list_format =
1107                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1108                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1109
1110         if (!adev->gfx.rlc.register_list_format) {
1111                 err = -ENOMEM;
1112                 goto out;
1113         }
1114
1115         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1116                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1117         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1118                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1119
1120         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1121
1122         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1123                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1124         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1125                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1126
1127         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1128                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1129                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1130                 if (err == -ENOENT) {
1131                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1132                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1133                 }
1134         } else {
1135                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1136                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1137         }
1138         if (err)
1139                 goto out;
1140         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1141         if (err)
1142                 goto out;
1143         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1144         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1145         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1146
1147         if ((adev->asic_type != CHIP_STONEY) &&
1148             (adev->asic_type != CHIP_TOPAZ)) {
1149                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1150                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1151                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1152                         if (err == -ENOENT) {
1153                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1154                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1155                         }
1156                 } else {
1157                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1158                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1159                 }
1160                 if (!err) {
1161                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1162                         if (err)
1163                                 goto out;
1164                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1165                                 adev->gfx.mec2_fw->data;
1166                         adev->gfx.mec2_fw_version =
1167                                 le32_to_cpu(cp_hdr->header.ucode_version);
1168                         adev->gfx.mec2_feature_version =
1169                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1170                 } else {
1171                         err = 0;
1172                         adev->gfx.mec2_fw = NULL;
1173                 }
1174         }
1175
1176         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1177         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1178         info->fw = adev->gfx.pfp_fw;
1179         header = (const struct common_firmware_header *)info->fw->data;
1180         adev->firmware.fw_size +=
1181                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1182
1183         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1184         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1185         info->fw = adev->gfx.me_fw;
1186         header = (const struct common_firmware_header *)info->fw->data;
1187         adev->firmware.fw_size +=
1188                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1189
1190         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1191         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1192         info->fw = adev->gfx.ce_fw;
1193         header = (const struct common_firmware_header *)info->fw->data;
1194         adev->firmware.fw_size +=
1195                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1196
1197         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1198         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1199         info->fw = adev->gfx.rlc_fw;
1200         header = (const struct common_firmware_header *)info->fw->data;
1201         adev->firmware.fw_size +=
1202                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1203
1204         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1205         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1206         info->fw = adev->gfx.mec_fw;
1207         header = (const struct common_firmware_header *)info->fw->data;
1208         adev->firmware.fw_size +=
1209                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1210
1211         /* we need account JT in */
1212         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1213         adev->firmware.fw_size +=
1214                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1215
1216         if (amdgpu_sriov_vf(adev)) {
1217                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1218                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1219                 info->fw = adev->gfx.mec_fw;
1220                 adev->firmware.fw_size +=
1221                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1222         }
1223
1224         if (adev->gfx.mec2_fw) {
1225                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1226                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1227                 info->fw = adev->gfx.mec2_fw;
1228                 header = (const struct common_firmware_header *)info->fw->data;
1229                 adev->firmware.fw_size +=
1230                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1231         }
1232
1233 out:
1234         if (err) {
1235                 dev_err(adev->dev,
1236                         "gfx8: Failed to load firmware \"%s\"\n",
1237                         fw_name);
1238                 release_firmware(adev->gfx.pfp_fw);
1239                 adev->gfx.pfp_fw = NULL;
1240                 release_firmware(adev->gfx.me_fw);
1241                 adev->gfx.me_fw = NULL;
1242                 release_firmware(adev->gfx.ce_fw);
1243                 adev->gfx.ce_fw = NULL;
1244                 release_firmware(adev->gfx.rlc_fw);
1245                 adev->gfx.rlc_fw = NULL;
1246                 release_firmware(adev->gfx.mec_fw);
1247                 adev->gfx.mec_fw = NULL;
1248                 release_firmware(adev->gfx.mec2_fw);
1249                 adev->gfx.mec2_fw = NULL;
1250         }
1251         return err;
1252 }
1253
1254 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1255                                     volatile u32 *buffer)
1256 {
1257         u32 count = 0, i;
1258         const struct cs_section_def *sect = NULL;
1259         const struct cs_extent_def *ext = NULL;
1260
1261         if (adev->gfx.rlc.cs_data == NULL)
1262                 return;
1263         if (buffer == NULL)
1264                 return;
1265
1266         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1267         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1268
1269         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1270         buffer[count++] = cpu_to_le32(0x80000000);
1271         buffer[count++] = cpu_to_le32(0x80000000);
1272
1273         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1274                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1275                         if (sect->id == SECT_CONTEXT) {
1276                                 buffer[count++] =
1277                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1278                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1279                                                 PACKET3_SET_CONTEXT_REG_START);
1280                                 for (i = 0; i < ext->reg_count; i++)
1281                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1282                         } else {
1283                                 return;
1284                         }
1285                 }
1286         }
1287
1288         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1289         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1290                         PACKET3_SET_CONTEXT_REG_START);
1291         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1292         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1293
1294         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1295         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1296
1297         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1298         buffer[count++] = cpu_to_le32(0);
1299 }
1300
1301 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1302 {
1303         const __le32 *fw_data;
1304         volatile u32 *dst_ptr;
1305         int me, i, max_me = 4;
1306         u32 bo_offset = 0;
1307         u32 table_offset, table_size;
1308
1309         if (adev->asic_type == CHIP_CARRIZO)
1310                 max_me = 5;
1311
1312         /* write the cp table buffer */
1313         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1314         for (me = 0; me < max_me; me++) {
1315                 if (me == 0) {
1316                         const struct gfx_firmware_header_v1_0 *hdr =
1317                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1318                         fw_data = (const __le32 *)
1319                                 (adev->gfx.ce_fw->data +
1320                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1321                         table_offset = le32_to_cpu(hdr->jt_offset);
1322                         table_size = le32_to_cpu(hdr->jt_size);
1323                 } else if (me == 1) {
1324                         const struct gfx_firmware_header_v1_0 *hdr =
1325                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1326                         fw_data = (const __le32 *)
1327                                 (adev->gfx.pfp_fw->data +
1328                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1329                         table_offset = le32_to_cpu(hdr->jt_offset);
1330                         table_size = le32_to_cpu(hdr->jt_size);
1331                 } else if (me == 2) {
1332                         const struct gfx_firmware_header_v1_0 *hdr =
1333                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1334                         fw_data = (const __le32 *)
1335                                 (adev->gfx.me_fw->data +
1336                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1337                         table_offset = le32_to_cpu(hdr->jt_offset);
1338                         table_size = le32_to_cpu(hdr->jt_size);
1339                 } else if (me == 3) {
1340                         const struct gfx_firmware_header_v1_0 *hdr =
1341                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1342                         fw_data = (const __le32 *)
1343                                 (adev->gfx.mec_fw->data +
1344                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1345                         table_offset = le32_to_cpu(hdr->jt_offset);
1346                         table_size = le32_to_cpu(hdr->jt_size);
1347                 } else  if (me == 4) {
1348                         const struct gfx_firmware_header_v1_0 *hdr =
1349                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1350                         fw_data = (const __le32 *)
1351                                 (adev->gfx.mec2_fw->data +
1352                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1353                         table_offset = le32_to_cpu(hdr->jt_offset);
1354                         table_size = le32_to_cpu(hdr->jt_size);
1355                 }
1356
1357                 for (i = 0; i < table_size; i ++) {
1358                         dst_ptr[bo_offset + i] =
1359                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1360                 }
1361
1362                 bo_offset += table_size;
1363         }
1364 }
1365
1366 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1367 {
1368         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL);
1369         amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL);
1370 }
1371
1372 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1373 {
1374         volatile u32 *dst_ptr;
1375         u32 dws;
1376         const struct cs_section_def *cs_data;
1377         int r;
1378
1379         adev->gfx.rlc.cs_data = vi_cs_data;
1380
1381         cs_data = adev->gfx.rlc.cs_data;
1382
1383         if (cs_data) {
1384                 /* clear state block */
1385                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1386
1387                 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE,
1388                                               AMDGPU_GEM_DOMAIN_VRAM,
1389                                               &adev->gfx.rlc.clear_state_obj,
1390                                               &adev->gfx.rlc.clear_state_gpu_addr,
1391                                               (void **)&adev->gfx.rlc.cs_ptr);
1392                 if (r) {
1393                         dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1394                         gfx_v8_0_rlc_fini(adev);
1395                         return r;
1396                 }
1397
1398                 /* set up the cs buffer */
1399                 dst_ptr = adev->gfx.rlc.cs_ptr;
1400                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1401                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1402                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1403         }
1404
1405         if ((adev->asic_type == CHIP_CARRIZO) ||
1406             (adev->asic_type == CHIP_STONEY)) {
1407                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1408                 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size,
1409                                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1410                                               &adev->gfx.rlc.cp_table_obj,
1411                                               &adev->gfx.rlc.cp_table_gpu_addr,
1412                                               (void **)&adev->gfx.rlc.cp_table_ptr);
1413                 if (r) {
1414                         dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1415                         return r;
1416                 }
1417
1418                 cz_init_cp_jump_table(adev);
1419
1420                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1421                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1422         }
1423
1424         return 0;
1425 }
1426
1427 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1428 {
1429         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1430 }
1431
1432 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1433 {
1434         int r;
1435         u32 *hpd;
1436         size_t mec_hpd_size;
1437
1438         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1439
1440         /* take ownership of the relevant compute queues */
1441         amdgpu_gfx_compute_queue_acquire(adev);
1442
1443         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1444
1445         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1446                                       AMDGPU_GEM_DOMAIN_GTT,
1447                                       &adev->gfx.mec.hpd_eop_obj,
1448                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1449                                       (void **)&hpd);
1450         if (r) {
1451                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1452                 return r;
1453         }
1454
1455         memset(hpd, 0, mec_hpd_size);
1456
1457         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1458         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1459
1460         return 0;
1461 }
1462
1463 static const u32 vgpr_init_compute_shader[] =
1464 {
1465         0x7e000209, 0x7e020208,
1466         0x7e040207, 0x7e060206,
1467         0x7e080205, 0x7e0a0204,
1468         0x7e0c0203, 0x7e0e0202,
1469         0x7e100201, 0x7e120200,
1470         0x7e140209, 0x7e160208,
1471         0x7e180207, 0x7e1a0206,
1472         0x7e1c0205, 0x7e1e0204,
1473         0x7e200203, 0x7e220202,
1474         0x7e240201, 0x7e260200,
1475         0x7e280209, 0x7e2a0208,
1476         0x7e2c0207, 0x7e2e0206,
1477         0x7e300205, 0x7e320204,
1478         0x7e340203, 0x7e360202,
1479         0x7e380201, 0x7e3a0200,
1480         0x7e3c0209, 0x7e3e0208,
1481         0x7e400207, 0x7e420206,
1482         0x7e440205, 0x7e460204,
1483         0x7e480203, 0x7e4a0202,
1484         0x7e4c0201, 0x7e4e0200,
1485         0x7e500209, 0x7e520208,
1486         0x7e540207, 0x7e560206,
1487         0x7e580205, 0x7e5a0204,
1488         0x7e5c0203, 0x7e5e0202,
1489         0x7e600201, 0x7e620200,
1490         0x7e640209, 0x7e660208,
1491         0x7e680207, 0x7e6a0206,
1492         0x7e6c0205, 0x7e6e0204,
1493         0x7e700203, 0x7e720202,
1494         0x7e740201, 0x7e760200,
1495         0x7e780209, 0x7e7a0208,
1496         0x7e7c0207, 0x7e7e0206,
1497         0xbf8a0000, 0xbf810000,
1498 };
1499
1500 static const u32 sgpr_init_compute_shader[] =
1501 {
1502         0xbe8a0100, 0xbe8c0102,
1503         0xbe8e0104, 0xbe900106,
1504         0xbe920108, 0xbe940100,
1505         0xbe960102, 0xbe980104,
1506         0xbe9a0106, 0xbe9c0108,
1507         0xbe9e0100, 0xbea00102,
1508         0xbea20104, 0xbea40106,
1509         0xbea60108, 0xbea80100,
1510         0xbeaa0102, 0xbeac0104,
1511         0xbeae0106, 0xbeb00108,
1512         0xbeb20100, 0xbeb40102,
1513         0xbeb60104, 0xbeb80106,
1514         0xbeba0108, 0xbebc0100,
1515         0xbebe0102, 0xbec00104,
1516         0xbec20106, 0xbec40108,
1517         0xbec60100, 0xbec80102,
1518         0xbee60004, 0xbee70005,
1519         0xbeea0006, 0xbeeb0007,
1520         0xbee80008, 0xbee90009,
1521         0xbefc0000, 0xbf8a0000,
1522         0xbf810000, 0x00000000,
1523 };
1524
1525 static const u32 vgpr_init_regs[] =
1526 {
1527         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1528         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1529         mmCOMPUTE_NUM_THREAD_X, 256*4,
1530         mmCOMPUTE_NUM_THREAD_Y, 1,
1531         mmCOMPUTE_NUM_THREAD_Z, 1,
1532         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1533         mmCOMPUTE_PGM_RSRC2, 20,
1534         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1535         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1536         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1537         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1538         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1539         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1540         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1541         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1542         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1543         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1544 };
1545
1546 static const u32 sgpr1_init_regs[] =
1547 {
1548         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1549         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1550         mmCOMPUTE_NUM_THREAD_X, 256*5,
1551         mmCOMPUTE_NUM_THREAD_Y, 1,
1552         mmCOMPUTE_NUM_THREAD_Z, 1,
1553         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1554         mmCOMPUTE_PGM_RSRC2, 20,
1555         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1556         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1557         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1558         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1559         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1560         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1561         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1562         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1563         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1564         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1565 };
1566
1567 static const u32 sgpr2_init_regs[] =
1568 {
1569         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1570         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1571         mmCOMPUTE_NUM_THREAD_X, 256*5,
1572         mmCOMPUTE_NUM_THREAD_Y, 1,
1573         mmCOMPUTE_NUM_THREAD_Z, 1,
1574         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1575         mmCOMPUTE_PGM_RSRC2, 20,
1576         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1577         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1578         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1579         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1580         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1581         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1582         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1583         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1584         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1585         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1586 };
1587
1588 static const u32 sec_ded_counter_registers[] =
1589 {
1590         mmCPC_EDC_ATC_CNT,
1591         mmCPC_EDC_SCRATCH_CNT,
1592         mmCPC_EDC_UCODE_CNT,
1593         mmCPF_EDC_ATC_CNT,
1594         mmCPF_EDC_ROQ_CNT,
1595         mmCPF_EDC_TAG_CNT,
1596         mmCPG_EDC_ATC_CNT,
1597         mmCPG_EDC_DMA_CNT,
1598         mmCPG_EDC_TAG_CNT,
1599         mmDC_EDC_CSINVOC_CNT,
1600         mmDC_EDC_RESTORE_CNT,
1601         mmDC_EDC_STATE_CNT,
1602         mmGDS_EDC_CNT,
1603         mmGDS_EDC_GRBM_CNT,
1604         mmGDS_EDC_OA_DED,
1605         mmSPI_EDC_CNT,
1606         mmSQC_ATC_EDC_GATCL1_CNT,
1607         mmSQC_EDC_CNT,
1608         mmSQ_EDC_DED_CNT,
1609         mmSQ_EDC_INFO,
1610         mmSQ_EDC_SEC_CNT,
1611         mmTCC_EDC_CNT,
1612         mmTCP_ATC_EDC_GATCL1_CNT,
1613         mmTCP_EDC_CNT,
1614         mmTD_EDC_CNT
1615 };
1616
1617 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1618 {
1619         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1620         struct amdgpu_ib ib;
1621         struct dma_fence *f = NULL;
1622         int r, i;
1623         u32 tmp;
1624         unsigned total_size, vgpr_offset, sgpr_offset;
1625         u64 gpu_addr;
1626
1627         /* only supported on CZ */
1628         if (adev->asic_type != CHIP_CARRIZO)
1629                 return 0;
1630
1631         /* bail if the compute ring is not ready */
1632         if (!ring->ready)
1633                 return 0;
1634
1635         tmp = RREG32(mmGB_EDC_MODE);
1636         WREG32(mmGB_EDC_MODE, 0);
1637
1638         total_size =
1639                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1640         total_size +=
1641                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1642         total_size +=
1643                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1644         total_size = ALIGN(total_size, 256);
1645         vgpr_offset = total_size;
1646         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1647         sgpr_offset = total_size;
1648         total_size += sizeof(sgpr_init_compute_shader);
1649
1650         /* allocate an indirect buffer to put the commands in */
1651         memset(&ib, 0, sizeof(ib));
1652         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1653         if (r) {
1654                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1655                 return r;
1656         }
1657
1658         /* load the compute shaders */
1659         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1660                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1661
1662         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1663                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1664
1665         /* init the ib length to 0 */
1666         ib.length_dw = 0;
1667
1668         /* VGPR */
1669         /* write the register state for the compute dispatch */
1670         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1671                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1672                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1673                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1674         }
1675         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1676         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1677         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1678         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1679         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1680         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1681
1682         /* write dispatch packet */
1683         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1684         ib.ptr[ib.length_dw++] = 8; /* x */
1685         ib.ptr[ib.length_dw++] = 1; /* y */
1686         ib.ptr[ib.length_dw++] = 1; /* z */
1687         ib.ptr[ib.length_dw++] =
1688                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1689
1690         /* write CS partial flush packet */
1691         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1692         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1693
1694         /* SGPR1 */
1695         /* write the register state for the compute dispatch */
1696         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1697                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1698                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1699                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1700         }
1701         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1702         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1703         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1704         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1705         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1706         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1707
1708         /* write dispatch packet */
1709         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1710         ib.ptr[ib.length_dw++] = 8; /* x */
1711         ib.ptr[ib.length_dw++] = 1; /* y */
1712         ib.ptr[ib.length_dw++] = 1; /* z */
1713         ib.ptr[ib.length_dw++] =
1714                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1715
1716         /* write CS partial flush packet */
1717         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1718         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1719
1720         /* SGPR2 */
1721         /* write the register state for the compute dispatch */
1722         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1723                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1724                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1725                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1726         }
1727         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1728         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1729         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1730         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1731         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1732         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1733
1734         /* write dispatch packet */
1735         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1736         ib.ptr[ib.length_dw++] = 8; /* x */
1737         ib.ptr[ib.length_dw++] = 1; /* y */
1738         ib.ptr[ib.length_dw++] = 1; /* z */
1739         ib.ptr[ib.length_dw++] =
1740                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1741
1742         /* write CS partial flush packet */
1743         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1744         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1745
1746         /* shedule the ib on the ring */
1747         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1748         if (r) {
1749                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1750                 goto fail;
1751         }
1752
1753         /* wait for the GPU to finish processing the IB */
1754         r = dma_fence_wait(f, false);
1755         if (r) {
1756                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1757                 goto fail;
1758         }
1759
1760         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1761         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1762         WREG32(mmGB_EDC_MODE, tmp);
1763
1764         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1765         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1766         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1767
1768
1769         /* read back registers to clear the counters */
1770         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1771                 RREG32(sec_ded_counter_registers[i]);
1772
1773 fail:
1774         amdgpu_ib_free(adev, &ib, NULL);
1775         dma_fence_put(f);
1776
1777         return r;
1778 }
1779
1780 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1781 {
1782         u32 gb_addr_config;
1783         u32 mc_shared_chmap, mc_arb_ramcfg;
1784         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1785         u32 tmp;
1786         int ret;
1787
1788         switch (adev->asic_type) {
1789         case CHIP_TOPAZ:
1790                 adev->gfx.config.max_shader_engines = 1;
1791                 adev->gfx.config.max_tile_pipes = 2;
1792                 adev->gfx.config.max_cu_per_sh = 6;
1793                 adev->gfx.config.max_sh_per_se = 1;
1794                 adev->gfx.config.max_backends_per_se = 2;
1795                 adev->gfx.config.max_texture_channel_caches = 2;
1796                 adev->gfx.config.max_gprs = 256;
1797                 adev->gfx.config.max_gs_threads = 32;
1798                 adev->gfx.config.max_hw_contexts = 8;
1799
1800                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1801                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1802                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1803                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1804                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1805                 break;
1806         case CHIP_FIJI:
1807                 adev->gfx.config.max_shader_engines = 4;
1808                 adev->gfx.config.max_tile_pipes = 16;
1809                 adev->gfx.config.max_cu_per_sh = 16;
1810                 adev->gfx.config.max_sh_per_se = 1;
1811                 adev->gfx.config.max_backends_per_se = 4;
1812                 adev->gfx.config.max_texture_channel_caches = 16;
1813                 adev->gfx.config.max_gprs = 256;
1814                 adev->gfx.config.max_gs_threads = 32;
1815                 adev->gfx.config.max_hw_contexts = 8;
1816
1817                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1818                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1819                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1820                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1821                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1822                 break;
1823         case CHIP_POLARIS11:
1824         case CHIP_POLARIS12:
1825                 ret = amdgpu_atombios_get_gfx_info(adev);
1826                 if (ret)
1827                         return ret;
1828                 adev->gfx.config.max_gprs = 256;
1829                 adev->gfx.config.max_gs_threads = 32;
1830                 adev->gfx.config.max_hw_contexts = 8;
1831
1832                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1833                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1834                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1835                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1836                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1837                 break;
1838         case CHIP_POLARIS10:
1839         case CHIP_VEGAM:
1840                 ret = amdgpu_atombios_get_gfx_info(adev);
1841                 if (ret)
1842                         return ret;
1843                 adev->gfx.config.max_gprs = 256;
1844                 adev->gfx.config.max_gs_threads = 32;
1845                 adev->gfx.config.max_hw_contexts = 8;
1846
1847                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1851                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1852                 break;
1853         case CHIP_TONGA:
1854                 adev->gfx.config.max_shader_engines = 4;
1855                 adev->gfx.config.max_tile_pipes = 8;
1856                 adev->gfx.config.max_cu_per_sh = 8;
1857                 adev->gfx.config.max_sh_per_se = 1;
1858                 adev->gfx.config.max_backends_per_se = 2;
1859                 adev->gfx.config.max_texture_channel_caches = 8;
1860                 adev->gfx.config.max_gprs = 256;
1861                 adev->gfx.config.max_gs_threads = 32;
1862                 adev->gfx.config.max_hw_contexts = 8;
1863
1864                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1865                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1866                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1867                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1868                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1869                 break;
1870         case CHIP_CARRIZO:
1871                 adev->gfx.config.max_shader_engines = 1;
1872                 adev->gfx.config.max_tile_pipes = 2;
1873                 adev->gfx.config.max_sh_per_se = 1;
1874                 adev->gfx.config.max_backends_per_se = 2;
1875                 adev->gfx.config.max_cu_per_sh = 8;
1876                 adev->gfx.config.max_texture_channel_caches = 2;
1877                 adev->gfx.config.max_gprs = 256;
1878                 adev->gfx.config.max_gs_threads = 32;
1879                 adev->gfx.config.max_hw_contexts = 8;
1880
1881                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1882                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1883                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1884                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1885                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1886                 break;
1887         case CHIP_STONEY:
1888                 adev->gfx.config.max_shader_engines = 1;
1889                 adev->gfx.config.max_tile_pipes = 2;
1890                 adev->gfx.config.max_sh_per_se = 1;
1891                 adev->gfx.config.max_backends_per_se = 1;
1892                 adev->gfx.config.max_cu_per_sh = 3;
1893                 adev->gfx.config.max_texture_channel_caches = 2;
1894                 adev->gfx.config.max_gprs = 256;
1895                 adev->gfx.config.max_gs_threads = 16;
1896                 adev->gfx.config.max_hw_contexts = 8;
1897
1898                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1902                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1903                 break;
1904         default:
1905                 adev->gfx.config.max_shader_engines = 2;
1906                 adev->gfx.config.max_tile_pipes = 4;
1907                 adev->gfx.config.max_cu_per_sh = 2;
1908                 adev->gfx.config.max_sh_per_se = 1;
1909                 adev->gfx.config.max_backends_per_se = 2;
1910                 adev->gfx.config.max_texture_channel_caches = 4;
1911                 adev->gfx.config.max_gprs = 256;
1912                 adev->gfx.config.max_gs_threads = 32;
1913                 adev->gfx.config.max_hw_contexts = 8;
1914
1915                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1916                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1917                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1918                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1919                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1920                 break;
1921         }
1922
1923         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1924         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1925         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1926
1927         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1928         adev->gfx.config.mem_max_burst_length_bytes = 256;
1929         if (adev->flags & AMD_IS_APU) {
1930                 /* Get memory bank mapping mode. */
1931                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1932                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1933                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1934
1935                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1936                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1937                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1938
1939                 /* Validate settings in case only one DIMM installed. */
1940                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1941                         dimm00_addr_map = 0;
1942                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1943                         dimm01_addr_map = 0;
1944                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1945                         dimm10_addr_map = 0;
1946                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1947                         dimm11_addr_map = 0;
1948
1949                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1950                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1951                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1952                         adev->gfx.config.mem_row_size_in_kb = 2;
1953                 else
1954                         adev->gfx.config.mem_row_size_in_kb = 1;
1955         } else {
1956                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1957                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1958                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1959                         adev->gfx.config.mem_row_size_in_kb = 4;
1960         }
1961
1962         adev->gfx.config.shader_engine_tile_size = 32;
1963         adev->gfx.config.num_gpus = 1;
1964         adev->gfx.config.multi_gpu_tile_size = 64;
1965
1966         /* fix up row size */
1967         switch (adev->gfx.config.mem_row_size_in_kb) {
1968         case 1:
1969         default:
1970                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1971                 break;
1972         case 2:
1973                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1974                 break;
1975         case 4:
1976                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1977                 break;
1978         }
1979         adev->gfx.config.gb_addr_config = gb_addr_config;
1980
1981         return 0;
1982 }
1983
1984 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1985                                         int mec, int pipe, int queue)
1986 {
1987         int r;
1988         unsigned irq_type;
1989         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1990
1991         ring = &adev->gfx.compute_ring[ring_id];
1992
1993         /* mec0 is me1 */
1994         ring->me = mec + 1;
1995         ring->pipe = pipe;
1996         ring->queue = queue;
1997
1998         ring->ring_obj = NULL;
1999         ring->use_doorbell = true;
2000         ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
2001         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2002                                 + (ring_id * GFX8_MEC_HPD_SIZE);
2003         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2004
2005         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2006                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2007                 + ring->pipe;
2008
2009         /* type-2 packets are deprecated on MEC, use type-3 instead */
2010         r = amdgpu_ring_init(adev, ring, 1024,
2011                         &adev->gfx.eop_irq, irq_type);
2012         if (r)
2013                 return r;
2014
2015
2016         return 0;
2017 }
2018
2019 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
2020
2021 static int gfx_v8_0_sw_init(void *handle)
2022 {
2023         int i, j, k, r, ring_id;
2024         struct amdgpu_ring *ring;
2025         struct amdgpu_kiq *kiq;
2026         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2027
2028         switch (adev->asic_type) {
2029         case CHIP_TONGA:
2030         case CHIP_CARRIZO:
2031         case CHIP_FIJI:
2032         case CHIP_POLARIS10:
2033         case CHIP_POLARIS11:
2034         case CHIP_POLARIS12:
2035         case CHIP_VEGAM:
2036                 adev->gfx.mec.num_mec = 2;
2037                 break;
2038         case CHIP_TOPAZ:
2039         case CHIP_STONEY:
2040         default:
2041                 adev->gfx.mec.num_mec = 1;
2042                 break;
2043         }
2044
2045         adev->gfx.mec.num_pipe_per_mec = 4;
2046         adev->gfx.mec.num_queue_per_pipe = 8;
2047
2048         /* EOP Event */
2049         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
2050         if (r)
2051                 return r;
2052
2053         /* Privileged reg */
2054         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
2055                               &adev->gfx.priv_reg_irq);
2056         if (r)
2057                 return r;
2058
2059         /* Privileged inst */
2060         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
2061                               &adev->gfx.priv_inst_irq);
2062         if (r)
2063                 return r;
2064
2065         /* Add CP EDC/ECC irq  */
2066         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
2067                               &adev->gfx.cp_ecc_error_irq);
2068         if (r)
2069                 return r;
2070
2071         /* SQ interrupts. */
2072         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
2073                               &adev->gfx.sq_irq);
2074         if (r) {
2075                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
2076                 return r;
2077         }
2078
2079         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
2080
2081         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2082
2083         gfx_v8_0_scratch_init(adev);
2084
2085         r = gfx_v8_0_init_microcode(adev);
2086         if (r) {
2087                 DRM_ERROR("Failed to load gfx firmware!\n");
2088                 return r;
2089         }
2090
2091         r = gfx_v8_0_rlc_init(adev);
2092         if (r) {
2093                 DRM_ERROR("Failed to init rlc BOs!\n");
2094                 return r;
2095         }
2096
2097         r = gfx_v8_0_mec_init(adev);
2098         if (r) {
2099                 DRM_ERROR("Failed to init MEC BOs!\n");
2100                 return r;
2101         }
2102
2103         /* set up the gfx ring */
2104         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2105                 ring = &adev->gfx.gfx_ring[i];
2106                 ring->ring_obj = NULL;
2107                 sprintf(ring->name, "gfx");
2108                 /* no gfx doorbells on iceland */
2109                 if (adev->asic_type != CHIP_TOPAZ) {
2110                         ring->use_doorbell = true;
2111                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2112                 }
2113
2114                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2115                                      AMDGPU_CP_IRQ_GFX_EOP);
2116                 if (r)
2117                         return r;
2118         }
2119
2120
2121         /* set up the compute queues - allocate horizontally across pipes */
2122         ring_id = 0;
2123         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2124                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2125                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2126                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2127                                         continue;
2128
2129                                 r = gfx_v8_0_compute_ring_init(adev,
2130                                                                 ring_id,
2131                                                                 i, k, j);
2132                                 if (r)
2133                                         return r;
2134
2135                                 ring_id++;
2136                         }
2137                 }
2138         }
2139
2140         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2141         if (r) {
2142                 DRM_ERROR("Failed to init KIQ BOs!\n");
2143                 return r;
2144         }
2145
2146         kiq = &adev->gfx.kiq;
2147         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2148         if (r)
2149                 return r;
2150
2151         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2152         r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2153         if (r)
2154                 return r;
2155
2156         adev->gfx.ce_ram_size = 0x8000;
2157
2158         r = gfx_v8_0_gpu_early_init(adev);
2159         if (r)
2160                 return r;
2161
2162         return 0;
2163 }
2164
2165 static int gfx_v8_0_sw_fini(void *handle)
2166 {
2167         int i;
2168         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2169
2170         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2171         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2172         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2173
2174         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2175                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2176         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2177                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2178
2179         amdgpu_gfx_compute_mqd_sw_fini(adev);
2180         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2181         amdgpu_gfx_kiq_fini(adev);
2182
2183         gfx_v8_0_mec_fini(adev);
2184         gfx_v8_0_rlc_fini(adev);
2185         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2186                                 &adev->gfx.rlc.clear_state_gpu_addr,
2187                                 (void **)&adev->gfx.rlc.cs_ptr);
2188         if ((adev->asic_type == CHIP_CARRIZO) ||
2189             (adev->asic_type == CHIP_STONEY)) {
2190                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2191                                 &adev->gfx.rlc.cp_table_gpu_addr,
2192                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2193         }
2194         gfx_v8_0_free_microcode(adev);
2195
2196         return 0;
2197 }
2198
2199 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2200 {
2201         uint32_t *modearray, *mod2array;
2202         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2203         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2204         u32 reg_offset;
2205
2206         modearray = adev->gfx.config.tile_mode_array;
2207         mod2array = adev->gfx.config.macrotile_mode_array;
2208
2209         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2210                 modearray[reg_offset] = 0;
2211
2212         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2213                 mod2array[reg_offset] = 0;
2214
2215         switch (adev->asic_type) {
2216         case CHIP_TOPAZ:
2217                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2218                                 PIPE_CONFIG(ADDR_SURF_P2) |
2219                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2220                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2221                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2222                                 PIPE_CONFIG(ADDR_SURF_P2) |
2223                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2224                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2225                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2226                                 PIPE_CONFIG(ADDR_SURF_P2) |
2227                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2228                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2229                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2230                                 PIPE_CONFIG(ADDR_SURF_P2) |
2231                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2232                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2233                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2234                                 PIPE_CONFIG(ADDR_SURF_P2) |
2235                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2236                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2237                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2238                                 PIPE_CONFIG(ADDR_SURF_P2) |
2239                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2240                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2241                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2242                                 PIPE_CONFIG(ADDR_SURF_P2) |
2243                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2244                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2245                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2246                                 PIPE_CONFIG(ADDR_SURF_P2));
2247                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2248                                 PIPE_CONFIG(ADDR_SURF_P2) |
2249                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2250                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2251                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2252                                  PIPE_CONFIG(ADDR_SURF_P2) |
2253                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2254                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2255                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2256                                  PIPE_CONFIG(ADDR_SURF_P2) |
2257                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2258                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2259                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2260                                  PIPE_CONFIG(ADDR_SURF_P2) |
2261                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2262                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2263                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2264                                  PIPE_CONFIG(ADDR_SURF_P2) |
2265                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2266                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2267                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2268                                  PIPE_CONFIG(ADDR_SURF_P2) |
2269                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2270                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2271                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2272                                  PIPE_CONFIG(ADDR_SURF_P2) |
2273                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2274                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2275                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2276                                  PIPE_CONFIG(ADDR_SURF_P2) |
2277                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2278                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2279                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2280                                  PIPE_CONFIG(ADDR_SURF_P2) |
2281                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2282                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2283                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2284                                  PIPE_CONFIG(ADDR_SURF_P2) |
2285                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2286                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2287                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2288                                  PIPE_CONFIG(ADDR_SURF_P2) |
2289                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2290                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2291                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2292                                  PIPE_CONFIG(ADDR_SURF_P2) |
2293                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2294                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2295                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2296                                  PIPE_CONFIG(ADDR_SURF_P2) |
2297                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2298                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2299                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2300                                  PIPE_CONFIG(ADDR_SURF_P2) |
2301                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2302                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2303                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2304                                  PIPE_CONFIG(ADDR_SURF_P2) |
2305                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2306                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2307                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2308                                  PIPE_CONFIG(ADDR_SURF_P2) |
2309                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2310                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2311                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2312                                  PIPE_CONFIG(ADDR_SURF_P2) |
2313                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2314                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2315                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2316                                  PIPE_CONFIG(ADDR_SURF_P2) |
2317                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2319
2320                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2321                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2322                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2323                                 NUM_BANKS(ADDR_SURF_8_BANK));
2324                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2325                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2326                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2327                                 NUM_BANKS(ADDR_SURF_8_BANK));
2328                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2329                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2330                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2331                                 NUM_BANKS(ADDR_SURF_8_BANK));
2332                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2333                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2334                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2335                                 NUM_BANKS(ADDR_SURF_8_BANK));
2336                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2337                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2338                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2339                                 NUM_BANKS(ADDR_SURF_8_BANK));
2340                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2341                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2342                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2343                                 NUM_BANKS(ADDR_SURF_8_BANK));
2344                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2345                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2346                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2347                                 NUM_BANKS(ADDR_SURF_8_BANK));
2348                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2349                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2350                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2351                                 NUM_BANKS(ADDR_SURF_16_BANK));
2352                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2353                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2354                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2355                                 NUM_BANKS(ADDR_SURF_16_BANK));
2356                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2357                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2358                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2359                                  NUM_BANKS(ADDR_SURF_16_BANK));
2360                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2361                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2362                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2363                                  NUM_BANKS(ADDR_SURF_16_BANK));
2364                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2365                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2366                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2367                                  NUM_BANKS(ADDR_SURF_16_BANK));
2368                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2369                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2370                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2371                                  NUM_BANKS(ADDR_SURF_16_BANK));
2372                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2373                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2374                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2375                                  NUM_BANKS(ADDR_SURF_8_BANK));
2376
2377                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2378                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2379                             reg_offset != 23)
2380                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2381
2382                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2383                         if (reg_offset != 7)
2384                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2385
2386                 break;
2387         case CHIP_FIJI:
2388         case CHIP_VEGAM:
2389                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2390                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2391                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2392                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2396                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2397                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2398                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2400                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2401                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2402                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2404                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2405                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2406                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2408                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2409                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2410                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2412                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2413                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2414                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2415                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2416                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2417                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2418                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2419                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2420                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2421                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2423                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2424                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2426                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2428                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2430                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2432                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2434                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2435                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2436                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2437                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2439                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2440                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2441                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2442                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2443                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2444                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2446                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2447                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2448                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2449                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2450                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2451                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2452                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2453                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2454                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2455                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2456                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2457                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2458                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2459                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2460                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2461                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2462                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2463                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2464                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2465                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2466                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2467                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2468                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2469                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2470                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2471                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2472                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2474                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2475                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2476                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2477                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2478                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2479                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2480                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2482                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2483                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2484                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2485                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2486                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2487                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2488                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2489                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2490                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2491                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2492                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2493                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2494                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2495                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2496                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2497                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2498                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2499                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2500                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2501                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2502                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2503                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2504                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2505                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2506                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2507                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2508                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2509                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2510                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2511
2512                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2514                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2515                                 NUM_BANKS(ADDR_SURF_8_BANK));
2516                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2517                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2518                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2519                                 NUM_BANKS(ADDR_SURF_8_BANK));
2520                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2522                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2523                                 NUM_BANKS(ADDR_SURF_8_BANK));
2524                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2527                                 NUM_BANKS(ADDR_SURF_8_BANK));
2528                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2530                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2531                                 NUM_BANKS(ADDR_SURF_8_BANK));
2532                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2533                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2534                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2535                                 NUM_BANKS(ADDR_SURF_8_BANK));
2536                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2537                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2538                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2539                                 NUM_BANKS(ADDR_SURF_8_BANK));
2540                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2541                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2542                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2543                                 NUM_BANKS(ADDR_SURF_8_BANK));
2544                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2545                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2546                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2547                                 NUM_BANKS(ADDR_SURF_8_BANK));
2548                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2550                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2551                                  NUM_BANKS(ADDR_SURF_8_BANK));
2552                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2553                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2554                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2555                                  NUM_BANKS(ADDR_SURF_8_BANK));
2556                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2557                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2558                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2559                                  NUM_BANKS(ADDR_SURF_8_BANK));
2560                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2562                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2563                                  NUM_BANKS(ADDR_SURF_8_BANK));
2564                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2566                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2567                                  NUM_BANKS(ADDR_SURF_4_BANK));
2568
2569                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2570                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2571
2572                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2573                         if (reg_offset != 7)
2574                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2575
2576                 break;
2577         case CHIP_TONGA:
2578                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2579                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2580                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2581                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2582                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2585                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2586                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2587                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2589                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2590                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2591                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2593                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2594                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2595                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2597                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2598                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2599                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2601                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2602                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2603                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2604                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2605                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2606                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2607                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2608                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2609                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2610                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2612                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2613                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2615                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2616                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2617                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2619                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2620                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2621                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2622                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2623                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2624                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2626                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2627                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2628                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2629                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2630                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2631                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2632                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2633                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2634                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2635                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2636                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2637                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2638                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2639                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2640                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2641                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2642                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2643                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2644                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2645                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2646                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2647                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2648                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2649                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2651                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2652                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2653                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2654                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2655                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2656                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2657                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2658                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2659                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2660                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2661                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2662                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2663                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2664                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2665                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2666                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2667                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2668                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2669                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2671                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2672                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2673                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2674                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2675                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2676                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2677                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2678                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2679                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2680                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2681                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2682                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2683                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2684                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2685                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2686                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2687                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2689                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2690                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2691                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2693                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2694                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2695                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2696                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2697                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2699                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2700
2701                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2702                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2703                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2704                                 NUM_BANKS(ADDR_SURF_16_BANK));
2705                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2707                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2708                                 NUM_BANKS(ADDR_SURF_16_BANK));
2709                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2711                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2712                                 NUM_BANKS(ADDR_SURF_16_BANK));
2713                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2714                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2715                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2716                                 NUM_BANKS(ADDR_SURF_16_BANK));
2717                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2718                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2719                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2720                                 NUM_BANKS(ADDR_SURF_16_BANK));
2721                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2723                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2724                                 NUM_BANKS(ADDR_SURF_16_BANK));
2725                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2728                                 NUM_BANKS(ADDR_SURF_16_BANK));
2729                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2730                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2731                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2732                                 NUM_BANKS(ADDR_SURF_16_BANK));
2733                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2734                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2735                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2736                                 NUM_BANKS(ADDR_SURF_16_BANK));
2737                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2738                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2739                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2740                                  NUM_BANKS(ADDR_SURF_16_BANK));
2741                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2742                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2743                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2744                                  NUM_BANKS(ADDR_SURF_16_BANK));
2745                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2747                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2748                                  NUM_BANKS(ADDR_SURF_8_BANK));
2749                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2750                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2751                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2752                                  NUM_BANKS(ADDR_SURF_4_BANK));
2753                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2754                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2755                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2756                                  NUM_BANKS(ADDR_SURF_4_BANK));
2757
2758                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2759                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2760
2761                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2762                         if (reg_offset != 7)
2763                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2764
2765                 break;
2766         case CHIP_POLARIS11:
2767         case CHIP_POLARIS12:
2768                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2769                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2770                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2771                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2772                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2773                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2774                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2775                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2776                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2777                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2778                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2779                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2780                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2781                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2782                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2783                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2784                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2786                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2787                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2788                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2789                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2790                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2791                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2792                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2794                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2795                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2796                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2797                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2798                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2799                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2800                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2801                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2802                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2803                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2805                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2806                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2809                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2810                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2811                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2813                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2814                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2815                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2817                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2818                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2819                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2821                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2822                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2823                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2825                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2826                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2827                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2828                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2829                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2830                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2831                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2832                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2833                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2834                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2835                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2837                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2838                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2839                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2840                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2841                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2842                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2843                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2844                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2845                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2846                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2847                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2849                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2850                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2851                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2852                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2853                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2854                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2855                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2856                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2857                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2858                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2859                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2860                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2861                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2862                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2863                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2864                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2865                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2866                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2867                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2868                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2869                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2870                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2871                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2872                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2873                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2874                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2875                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2876                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2877                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2878                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2881                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2882                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2883                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2884                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2885                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2886                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2887                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2888                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2890
2891                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2892                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2893                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2894                                 NUM_BANKS(ADDR_SURF_16_BANK));
2895
2896                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2897                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2898                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2899                                 NUM_BANKS(ADDR_SURF_16_BANK));
2900
2901                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2902                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2903                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2904                                 NUM_BANKS(ADDR_SURF_16_BANK));
2905
2906                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2907                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2908                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2909                                 NUM_BANKS(ADDR_SURF_16_BANK));
2910
2911                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2912                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2913                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2914                                 NUM_BANKS(ADDR_SURF_16_BANK));
2915
2916                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2917                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2918                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2919                                 NUM_BANKS(ADDR_SURF_16_BANK));
2920
2921                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2922                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2923                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2924                                 NUM_BANKS(ADDR_SURF_16_BANK));
2925
2926                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2927                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2928                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929                                 NUM_BANKS(ADDR_SURF_16_BANK));
2930
2931                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2932                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2933                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2934                                 NUM_BANKS(ADDR_SURF_16_BANK));
2935
2936                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2937                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2938                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2939                                 NUM_BANKS(ADDR_SURF_16_BANK));
2940
2941                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2942                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2943                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2944                                 NUM_BANKS(ADDR_SURF_16_BANK));
2945
2946                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2947                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2948                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2949                                 NUM_BANKS(ADDR_SURF_16_BANK));
2950
2951                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2952                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2953                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2954                                 NUM_BANKS(ADDR_SURF_8_BANK));
2955
2956                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2957                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2958                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2959                                 NUM_BANKS(ADDR_SURF_4_BANK));
2960
2961                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2962                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2963
2964                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2965                         if (reg_offset != 7)
2966                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2967
2968                 break;
2969         case CHIP_POLARIS10:
2970                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2971                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2972                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2973                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2974                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2975                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2976                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2977                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2978                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2979                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2980                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2981                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2982                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2983                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2984                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2985                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2986                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2987                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2988                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2989                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2990                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2991                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2992                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2993                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2994                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2995                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2996                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2997                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2998                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2999                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3000                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3001                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3002                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3003                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3004                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3005                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3007                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3008                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3009                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3011                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3012                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3013                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3016                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3017                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3018                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3019                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3020                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3021                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3022                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3023                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3024                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3025                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3026                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3027                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3028                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3029                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3030                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3031                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3032                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3033                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3034                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3035                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3036                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3038                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3039                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3040                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3041                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3042                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3043                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3044                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3045                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3046                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3047                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3048                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3049                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3050                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3051                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3052                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3053                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3054                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3055                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3056                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3057                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3058                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3059                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3060                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3061                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3062                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3063                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3064                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3065                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3066                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3067                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3068                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3069                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3070                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3071                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3072                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3073                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3074                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3075                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3076                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3077                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3078                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3079                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3080                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3081                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3082                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3083                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3084                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3085                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3086                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3087                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3088                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3089                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3090                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3091                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3092
3093                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3095                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3096                                 NUM_BANKS(ADDR_SURF_16_BANK));
3097
3098                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3099                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3100                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3101                                 NUM_BANKS(ADDR_SURF_16_BANK));
3102
3103                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3104                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3105                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3106                                 NUM_BANKS(ADDR_SURF_16_BANK));
3107
3108                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3109                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3110                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3111                                 NUM_BANKS(ADDR_SURF_16_BANK));
3112
3113                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3114                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3115                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3116                                 NUM_BANKS(ADDR_SURF_16_BANK));
3117
3118                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3119                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3120                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3121                                 NUM_BANKS(ADDR_SURF_16_BANK));
3122
3123                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3124                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3125                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3126                                 NUM_BANKS(ADDR_SURF_16_BANK));
3127
3128                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3129                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3130                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3131                                 NUM_BANKS(ADDR_SURF_16_BANK));
3132
3133                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3134                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3135                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3136                                 NUM_BANKS(ADDR_SURF_16_BANK));
3137
3138                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3139                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3140                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3141                                 NUM_BANKS(ADDR_SURF_16_BANK));
3142
3143                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3144                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3145                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3146                                 NUM_BANKS(ADDR_SURF_16_BANK));
3147
3148                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3149                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3150                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3151                                 NUM_BANKS(ADDR_SURF_8_BANK));
3152
3153                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3156                                 NUM_BANKS(ADDR_SURF_4_BANK));
3157
3158                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3159                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3160                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3161                                 NUM_BANKS(ADDR_SURF_4_BANK));
3162
3163                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3164                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3165
3166                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3167                         if (reg_offset != 7)
3168                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3169
3170                 break;
3171         case CHIP_STONEY:
3172                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3173                                 PIPE_CONFIG(ADDR_SURF_P2) |
3174                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3175                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3176                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3177                                 PIPE_CONFIG(ADDR_SURF_P2) |
3178                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3179                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3180                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3181                                 PIPE_CONFIG(ADDR_SURF_P2) |
3182                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3183                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3184                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3185                                 PIPE_CONFIG(ADDR_SURF_P2) |
3186                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3187                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3188                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3189                                 PIPE_CONFIG(ADDR_SURF_P2) |
3190                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3191                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3192                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3193                                 PIPE_CONFIG(ADDR_SURF_P2) |
3194                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3195                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3196                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3197                                 PIPE_CONFIG(ADDR_SURF_P2) |
3198                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3199                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3200                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3201                                 PIPE_CONFIG(ADDR_SURF_P2));
3202                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3203                                 PIPE_CONFIG(ADDR_SURF_P2) |
3204                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3205                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3206                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3207                                  PIPE_CONFIG(ADDR_SURF_P2) |
3208                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3209                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3210                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3211                                  PIPE_CONFIG(ADDR_SURF_P2) |
3212                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3213                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3214                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3215                                  PIPE_CONFIG(ADDR_SURF_P2) |
3216                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3217                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3218                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3219                                  PIPE_CONFIG(ADDR_SURF_P2) |
3220                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3221                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3222                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3223                                  PIPE_CONFIG(ADDR_SURF_P2) |
3224                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3225                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3226                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3227                                  PIPE_CONFIG(ADDR_SURF_P2) |
3228                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3229                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3230                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3231                                  PIPE_CONFIG(ADDR_SURF_P2) |
3232                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3233                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3234                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3235                                  PIPE_CONFIG(ADDR_SURF_P2) |
3236                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3237                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3238                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3239                                  PIPE_CONFIG(ADDR_SURF_P2) |
3240                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3241                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3242                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3243                                  PIPE_CONFIG(ADDR_SURF_P2) |
3244                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3245                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3246                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3247                                  PIPE_CONFIG(ADDR_SURF_P2) |
3248                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3249                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3250                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3251                                  PIPE_CONFIG(ADDR_SURF_P2) |
3252                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3253                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3254                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3255                                  PIPE_CONFIG(ADDR_SURF_P2) |
3256                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3257                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3258                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3259                                  PIPE_CONFIG(ADDR_SURF_P2) |
3260                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3261                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3262                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3263                                  PIPE_CONFIG(ADDR_SURF_P2) |
3264                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3265                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3266                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3267                                  PIPE_CONFIG(ADDR_SURF_P2) |
3268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3271                                  PIPE_CONFIG(ADDR_SURF_P2) |
3272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3274
3275                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3276                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3277                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3278                                 NUM_BANKS(ADDR_SURF_8_BANK));
3279                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3280                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3281                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3282                                 NUM_BANKS(ADDR_SURF_8_BANK));
3283                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3284                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3285                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3286                                 NUM_BANKS(ADDR_SURF_8_BANK));
3287                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3288                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3289                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3290                                 NUM_BANKS(ADDR_SURF_8_BANK));
3291                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3292                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3293                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3294                                 NUM_BANKS(ADDR_SURF_8_BANK));
3295                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3296                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3297                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3298                                 NUM_BANKS(ADDR_SURF_8_BANK));
3299                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3300                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3301                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3302                                 NUM_BANKS(ADDR_SURF_8_BANK));
3303                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3304                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3305                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3306                                 NUM_BANKS(ADDR_SURF_16_BANK));
3307                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3308                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3309                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3310                                 NUM_BANKS(ADDR_SURF_16_BANK));
3311                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3312                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3313                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3314                                  NUM_BANKS(ADDR_SURF_16_BANK));
3315                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3316                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3317                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3318                                  NUM_BANKS(ADDR_SURF_16_BANK));
3319                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3320                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3321                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3322                                  NUM_BANKS(ADDR_SURF_16_BANK));
3323                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3324                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3325                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3326                                  NUM_BANKS(ADDR_SURF_16_BANK));
3327                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3328                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3329                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3330                                  NUM_BANKS(ADDR_SURF_8_BANK));
3331
3332                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3333                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3334                             reg_offset != 23)
3335                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3336
3337                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3338                         if (reg_offset != 7)
3339                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3340
3341                 break;
3342         default:
3343                 dev_warn(adev->dev,
3344                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3345                          adev->asic_type);
3346
3347         case CHIP_CARRIZO:
3348                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3349                                 PIPE_CONFIG(ADDR_SURF_P2) |
3350                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3351                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3352                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3353                                 PIPE_CONFIG(ADDR_SURF_P2) |
3354                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3355                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3356                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3357                                 PIPE_CONFIG(ADDR_SURF_P2) |
3358                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3359                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3360                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3361                                 PIPE_CONFIG(ADDR_SURF_P2) |
3362                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3363                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3364                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3365                                 PIPE_CONFIG(ADDR_SURF_P2) |
3366                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3367                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3368                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3369                                 PIPE_CONFIG(ADDR_SURF_P2) |
3370                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3371                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3372                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3373                                 PIPE_CONFIG(ADDR_SURF_P2) |
3374                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3375                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3376                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3377                                 PIPE_CONFIG(ADDR_SURF_P2));
3378                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3379                                 PIPE_CONFIG(ADDR_SURF_P2) |
3380                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3381                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3382                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3383                                  PIPE_CONFIG(ADDR_SURF_P2) |
3384                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3385                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3386                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3387                                  PIPE_CONFIG(ADDR_SURF_P2) |
3388                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3389                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3390                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3391                                  PIPE_CONFIG(ADDR_SURF_P2) |
3392                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3393                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3394                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3395                                  PIPE_CONFIG(ADDR_SURF_P2) |
3396                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3397                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3398                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3399                                  PIPE_CONFIG(ADDR_SURF_P2) |
3400                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3401                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3402                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3403                                  PIPE_CONFIG(ADDR_SURF_P2) |
3404                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3405                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3406                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3407                                  PIPE_CONFIG(ADDR_SURF_P2) |
3408                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3409                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3410                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3411                                  PIPE_CONFIG(ADDR_SURF_P2) |
3412                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3413                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3414                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3415                                  PIPE_CONFIG(ADDR_SURF_P2) |
3416                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3417                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3418                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3419                                  PIPE_CONFIG(ADDR_SURF_P2) |
3420                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3421                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3422                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3423                                  PIPE_CONFIG(ADDR_SURF_P2) |
3424                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3425                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3426                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3427                                  PIPE_CONFIG(ADDR_SURF_P2) |
3428                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3429                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3430                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3431                                  PIPE_CONFIG(ADDR_SURF_P2) |
3432                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3433                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3434                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3435                                  PIPE_CONFIG(ADDR_SURF_P2) |
3436                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3437                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3438                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3439                                  PIPE_CONFIG(ADDR_SURF_P2) |
3440                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3441                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3442                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3443                                  PIPE_CONFIG(ADDR_SURF_P2) |
3444                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3445                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3446                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3447                                  PIPE_CONFIG(ADDR_SURF_P2) |
3448                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3449                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3450
3451                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3452                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3453                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3454                                 NUM_BANKS(ADDR_SURF_8_BANK));
3455                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3456                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3457                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3458                                 NUM_BANKS(ADDR_SURF_8_BANK));
3459                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3460                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3461                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3462                                 NUM_BANKS(ADDR_SURF_8_BANK));
3463                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3464                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3465                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3466                                 NUM_BANKS(ADDR_SURF_8_BANK));
3467                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3468                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3469                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3470                                 NUM_BANKS(ADDR_SURF_8_BANK));
3471                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3472                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3473                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3474                                 NUM_BANKS(ADDR_SURF_8_BANK));
3475                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3476                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3477                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3478                                 NUM_BANKS(ADDR_SURF_8_BANK));
3479                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3480                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3481                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3482                                 NUM_BANKS(ADDR_SURF_16_BANK));
3483                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3484                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3485                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3486                                 NUM_BANKS(ADDR_SURF_16_BANK));
3487                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3488                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3489                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3490                                  NUM_BANKS(ADDR_SURF_16_BANK));
3491                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3492                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3493                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3494                                  NUM_BANKS(ADDR_SURF_16_BANK));
3495                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3496                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3497                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3498                                  NUM_BANKS(ADDR_SURF_16_BANK));
3499                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3500                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3501                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3502                                  NUM_BANKS(ADDR_SURF_16_BANK));
3503                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3504                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3505                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3506                                  NUM_BANKS(ADDR_SURF_8_BANK));
3507
3508                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3509                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3510                             reg_offset != 23)
3511                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3512
3513                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3514                         if (reg_offset != 7)
3515                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3516
3517                 break;
3518         }
3519 }
3520
3521 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3522                                   u32 se_num, u32 sh_num, u32 instance)
3523 {
3524         u32 data;
3525
3526         if (instance == 0xffffffff)
3527                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3528         else
3529                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3530
3531         if (se_num == 0xffffffff)
3532                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3533         else
3534                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3535
3536         if (sh_num == 0xffffffff)
3537                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3538         else
3539                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3540
3541         WREG32(mmGRBM_GFX_INDEX, data);
3542 }
3543
3544 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3545                                   u32 me, u32 pipe, u32 q)
3546 {
3547         vi_srbm_select(adev, me, pipe, q, 0);
3548 }
3549
3550 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3551 {
3552         u32 data, mask;
3553
3554         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3555                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3556
3557         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3558
3559         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3560                                          adev->gfx.config.max_sh_per_se);
3561
3562         return (~data) & mask;
3563 }
3564
3565 static void
3566 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3567 {
3568         switch (adev->asic_type) {
3569         case CHIP_FIJI:
3570         case CHIP_VEGAM:
3571                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3572                           RB_XSEL2(1) | PKR_MAP(2) |
3573                           PKR_XSEL(1) | PKR_YSEL(1) |
3574                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3575                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3576                            SE_PAIR_YSEL(2);
3577                 break;
3578         case CHIP_TONGA:
3579         case CHIP_POLARIS10:
3580                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3581                           SE_XSEL(1) | SE_YSEL(1);
3582                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3583                            SE_PAIR_YSEL(2);
3584                 break;
3585         case CHIP_TOPAZ:
3586         case CHIP_CARRIZO:
3587                 *rconf |= RB_MAP_PKR0(2);
3588                 *rconf1 |= 0x0;
3589                 break;
3590         case CHIP_POLARIS11:
3591         case CHIP_POLARIS12:
3592                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3593                           SE_XSEL(1) | SE_YSEL(1);
3594                 *rconf1 |= 0x0;
3595                 break;
3596         case CHIP_STONEY:
3597                 *rconf |= 0x0;
3598                 *rconf1 |= 0x0;
3599                 break;
3600         default:
3601                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3602                 break;
3603         }
3604 }
3605
3606 static void
3607 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3608                                         u32 raster_config, u32 raster_config_1,
3609                                         unsigned rb_mask, unsigned num_rb)
3610 {
3611         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3612         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3613         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3614         unsigned rb_per_se = num_rb / num_se;
3615         unsigned se_mask[4];
3616         unsigned se;
3617
3618         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3619         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3620         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3621         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3622
3623         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3624         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3625         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3626
3627         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3628                              (!se_mask[2] && !se_mask[3]))) {
3629                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3630
3631                 if (!se_mask[0] && !se_mask[1]) {
3632                         raster_config_1 |=
3633                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3634                 } else {
3635                         raster_config_1 |=
3636                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3637                 }
3638         }
3639
3640         for (se = 0; se < num_se; se++) {
3641                 unsigned raster_config_se = raster_config;
3642                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3643                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3644                 int idx = (se / 2) * 2;
3645
3646                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3647                         raster_config_se &= ~SE_MAP_MASK;
3648
3649                         if (!se_mask[idx]) {
3650                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3651                         } else {
3652                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3653                         }
3654                 }
3655
3656                 pkr0_mask &= rb_mask;
3657                 pkr1_mask &= rb_mask;
3658                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3659                         raster_config_se &= ~PKR_MAP_MASK;
3660
3661                         if (!pkr0_mask) {
3662                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3663                         } else {
3664                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3665                         }
3666                 }
3667
3668                 if (rb_per_se >= 2) {
3669                         unsigned rb0_mask = 1 << (se * rb_per_se);
3670                         unsigned rb1_mask = rb0_mask << 1;
3671
3672                         rb0_mask &= rb_mask;
3673                         rb1_mask &= rb_mask;
3674                         if (!rb0_mask || !rb1_mask) {
3675                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3676
3677                                 if (!rb0_mask) {
3678                                         raster_config_se |=
3679                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3680                                 } else {
3681                                         raster_config_se |=
3682                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3683                                 }
3684                         }
3685
3686                         if (rb_per_se > 2) {
3687                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3688                                 rb1_mask = rb0_mask << 1;
3689                                 rb0_mask &= rb_mask;
3690                                 rb1_mask &= rb_mask;
3691                                 if (!rb0_mask || !rb1_mask) {
3692                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3693
3694                                         if (!rb0_mask) {
3695                                                 raster_config_se |=
3696                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3697                                         } else {
3698                                                 raster_config_se |=
3699                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3700                                         }
3701                                 }
3702                         }
3703                 }
3704
3705                 /* GRBM_GFX_INDEX has a different offset on VI */
3706                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3707                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3708                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3709         }
3710
3711         /* GRBM_GFX_INDEX has a different offset on VI */
3712         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3713 }
3714
3715 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3716 {
3717         int i, j;
3718         u32 data;
3719         u32 raster_config = 0, raster_config_1 = 0;
3720         u32 active_rbs = 0;
3721         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3722                                         adev->gfx.config.max_sh_per_se;
3723         unsigned num_rb_pipes;
3724
3725         mutex_lock(&adev->grbm_idx_mutex);
3726         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3727                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3728                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3729                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3730                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3731                                                rb_bitmap_width_per_sh);
3732                 }
3733         }
3734         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3735
3736         adev->gfx.config.backend_enable_mask = active_rbs;
3737         adev->gfx.config.num_rbs = hweight32(active_rbs);
3738
3739         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3740                              adev->gfx.config.max_shader_engines, 16);
3741
3742         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3743
3744         if (!adev->gfx.config.backend_enable_mask ||
3745                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3746                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3747                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3748         } else {
3749                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3750                                                         adev->gfx.config.backend_enable_mask,
3751                                                         num_rb_pipes);
3752         }
3753
3754         /* cache the values for userspace */
3755         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3756                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3757                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3758                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3759                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3760                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3761                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3762                         adev->gfx.config.rb_config[i][j].raster_config =
3763                                 RREG32(mmPA_SC_RASTER_CONFIG);
3764                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3765                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3766                 }
3767         }
3768         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3769         mutex_unlock(&adev->grbm_idx_mutex);
3770 }
3771
3772 /**
3773  * gfx_v8_0_init_compute_vmid - gart enable
3774  *
3775  * @adev: amdgpu_device pointer
3776  *
3777  * Initialize compute vmid sh_mem registers
3778  *
3779  */
3780 #define DEFAULT_SH_MEM_BASES    (0x6000)
3781 #define FIRST_COMPUTE_VMID      (8)
3782 #define LAST_COMPUTE_VMID       (16)
3783 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3784 {
3785         int i;
3786         uint32_t sh_mem_config;
3787         uint32_t sh_mem_bases;
3788
3789         /*
3790          * Configure apertures:
3791          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3792          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3793          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3794          */
3795         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3796
3797         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3798                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3799                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3800                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3801                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3802                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3803
3804         mutex_lock(&adev->srbm_mutex);
3805         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3806                 vi_srbm_select(adev, 0, 0, 0, i);
3807                 /* CP and shaders */
3808                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3809                 WREG32(mmSH_MEM_APE1_BASE, 1);
3810                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3811                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3812         }
3813         vi_srbm_select(adev, 0, 0, 0, 0);
3814         mutex_unlock(&adev->srbm_mutex);
3815 }
3816
3817 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3818 {
3819         switch (adev->asic_type) {
3820         default:
3821                 adev->gfx.config.double_offchip_lds_buf = 1;
3822                 break;
3823         case CHIP_CARRIZO:
3824         case CHIP_STONEY:
3825                 adev->gfx.config.double_offchip_lds_buf = 0;
3826                 break;
3827         }
3828 }
3829
3830 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3831 {
3832         u32 tmp, sh_static_mem_cfg;
3833         int i;
3834
3835         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3836         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3837         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3838         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3839
3840         gfx_v8_0_tiling_mode_table_init(adev);
3841         gfx_v8_0_setup_rb(adev);
3842         gfx_v8_0_get_cu_info(adev);
3843         gfx_v8_0_config_init(adev);
3844
3845         /* XXX SH_MEM regs */
3846         /* where to put LDS, scratch, GPUVM in FSA64 space */
3847         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3848                                    SWIZZLE_ENABLE, 1);
3849         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3850                                    ELEMENT_SIZE, 1);
3851         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3852                                    INDEX_STRIDE, 3);
3853         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3854
3855         mutex_lock(&adev->srbm_mutex);
3856         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3857                 vi_srbm_select(adev, 0, 0, 0, i);
3858                 /* CP and shaders */
3859                 if (i == 0) {
3860                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3861                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3862                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3863                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3864                         WREG32(mmSH_MEM_CONFIG, tmp);
3865                         WREG32(mmSH_MEM_BASES, 0);
3866                 } else {
3867                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3868                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3869                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3870                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3871                         WREG32(mmSH_MEM_CONFIG, tmp);
3872                         tmp = adev->gmc.shared_aperture_start >> 48;
3873                         WREG32(mmSH_MEM_BASES, tmp);
3874                 }
3875
3876                 WREG32(mmSH_MEM_APE1_BASE, 1);
3877                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3878         }
3879         vi_srbm_select(adev, 0, 0, 0, 0);
3880         mutex_unlock(&adev->srbm_mutex);
3881
3882         gfx_v8_0_init_compute_vmid(adev);
3883
3884         mutex_lock(&adev->grbm_idx_mutex);
3885         /*
3886          * making sure that the following register writes will be broadcasted
3887          * to all the shaders
3888          */
3889         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3890
3891         WREG32(mmPA_SC_FIFO_SIZE,
3892                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3893                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3894                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3895                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3896                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3897                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3898                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3899                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3900
3901         tmp = RREG32(mmSPI_ARB_PRIORITY);
3902         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3903         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3904         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3905         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3906         WREG32(mmSPI_ARB_PRIORITY, tmp);
3907
3908         mutex_unlock(&adev->grbm_idx_mutex);
3909
3910 }
3911
3912 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3913 {
3914         u32 i, j, k;
3915         u32 mask;
3916
3917         mutex_lock(&adev->grbm_idx_mutex);
3918         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3919                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3920                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3921                         for (k = 0; k < adev->usec_timeout; k++) {
3922                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3923                                         break;
3924                                 udelay(1);
3925                         }
3926                         if (k == adev->usec_timeout) {
3927                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3928                                                       0xffffffff, 0xffffffff);
3929                                 mutex_unlock(&adev->grbm_idx_mutex);
3930                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3931                                          i, j);
3932                                 return;
3933                         }
3934                 }
3935         }
3936         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3937         mutex_unlock(&adev->grbm_idx_mutex);
3938
3939         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3940                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3941                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3942                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3943         for (k = 0; k < adev->usec_timeout; k++) {
3944                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3945                         break;
3946                 udelay(1);
3947         }
3948 }
3949
3950 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3951                                                bool enable)
3952 {
3953         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3954
3955         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3956         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3957         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3958         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3959
3960         WREG32(mmCP_INT_CNTL_RING0, tmp);
3961 }
3962
3963 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3964 {
3965         /* csib */
3966         WREG32(mmRLC_CSIB_ADDR_HI,
3967                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3968         WREG32(mmRLC_CSIB_ADDR_LO,
3969                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3970         WREG32(mmRLC_CSIB_LENGTH,
3971                         adev->gfx.rlc.clear_state_size);
3972 }
3973
3974 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3975                                 int ind_offset,
3976                                 int list_size,
3977                                 int *unique_indices,
3978                                 int *indices_count,
3979                                 int max_indices,
3980                                 int *ind_start_offsets,
3981                                 int *offset_count,
3982                                 int max_offset)
3983 {
3984         int indices;
3985         bool new_entry = true;
3986
3987         for (; ind_offset < list_size; ind_offset++) {
3988
3989                 if (new_entry) {
3990                         new_entry = false;
3991                         ind_start_offsets[*offset_count] = ind_offset;
3992                         *offset_count = *offset_count + 1;
3993                         BUG_ON(*offset_count >= max_offset);
3994                 }
3995
3996                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3997                         new_entry = true;
3998                         continue;
3999                 }
4000
4001                 ind_offset += 2;
4002
4003                 /* look for the matching indice */
4004                 for (indices = 0;
4005                         indices < *indices_count;
4006                         indices++) {
4007                         if (unique_indices[indices] ==
4008                                 register_list_format[ind_offset])
4009                                 break;
4010                 }
4011
4012                 if (indices >= *indices_count) {
4013                         unique_indices[*indices_count] =
4014                                 register_list_format[ind_offset];
4015                         indices = *indices_count;
4016                         *indices_count = *indices_count + 1;
4017                         BUG_ON(*indices_count >= max_indices);
4018                 }
4019
4020                 register_list_format[ind_offset] = indices;
4021         }
4022 }
4023
4024 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
4025 {
4026         int i, temp, data;
4027         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4028         int indices_count = 0;
4029         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4030         int offset_count = 0;
4031
4032         int list_size;
4033         unsigned int *register_list_format =
4034                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4035         if (!register_list_format)
4036                 return -ENOMEM;
4037         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4038                         adev->gfx.rlc.reg_list_format_size_bytes);
4039
4040         gfx_v8_0_parse_ind_reg_list(register_list_format,
4041                                 RLC_FormatDirectRegListLength,
4042                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4043                                 unique_indices,
4044                                 &indices_count,
4045                                 ARRAY_SIZE(unique_indices),
4046                                 indirect_start_offsets,
4047                                 &offset_count,
4048                                 ARRAY_SIZE(indirect_start_offsets));
4049
4050         /* save and restore list */
4051         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4052
4053         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4054         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4055                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4056
4057         /* indirect list */
4058         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4059         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4060                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4061
4062         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4063         list_size = list_size >> 1;
4064         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4065         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4066
4067         /* starting offsets starts */
4068         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4069                 adev->gfx.rlc.starting_offsets_start);
4070         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
4071                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4072                                 indirect_start_offsets[i]);
4073
4074         /* unique indices */
4075         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4076         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4077         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4078                 if (unique_indices[i] != 0) {
4079                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4080                         WREG32(data + i, unique_indices[i] >> 20);
4081                 }
4082         }
4083         kfree(register_list_format);
4084
4085         return 0;
4086 }
4087
4088 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4089 {
4090         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4091 }
4092
4093 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4094 {
4095         uint32_t data;
4096
4097         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4098
4099         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4100         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4101         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4102         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4103         WREG32(mmRLC_PG_DELAY, data);
4104
4105         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4106         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4107
4108 }
4109
4110 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4111                                                 bool enable)
4112 {
4113         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4114 }
4115
4116 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4117                                                   bool enable)
4118 {
4119         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4120 }
4121
4122 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4123 {
4124         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4125 }
4126
4127 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4128 {
4129         if ((adev->asic_type == CHIP_CARRIZO) ||
4130             (adev->asic_type == CHIP_STONEY)) {
4131                 gfx_v8_0_init_csb(adev);
4132                 gfx_v8_0_init_save_restore_list(adev);
4133                 gfx_v8_0_enable_save_restore_machine(adev);
4134                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4135                 gfx_v8_0_init_power_gating(adev);
4136                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4137         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4138                    (adev->asic_type == CHIP_POLARIS12) ||
4139                    (adev->asic_type == CHIP_VEGAM)) {
4140                 gfx_v8_0_init_csb(adev);
4141                 gfx_v8_0_init_save_restore_list(adev);
4142                 gfx_v8_0_enable_save_restore_machine(adev);
4143                 gfx_v8_0_init_power_gating(adev);
4144         }
4145
4146 }
4147
4148 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4149 {
4150         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4151
4152         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4153         gfx_v8_0_wait_for_rlc_serdes(adev);
4154 }
4155
4156 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4157 {
4158         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4159         udelay(50);
4160
4161         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4162         udelay(50);
4163 }
4164
4165 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4166 {
4167         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4168
4169         /* carrizo do enable cp interrupt after cp inited */
4170         if (!(adev->flags & AMD_IS_APU))
4171                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4172
4173         udelay(50);
4174 }
4175
4176 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4177 {
4178         gfx_v8_0_rlc_stop(adev);
4179         gfx_v8_0_rlc_reset(adev);
4180         gfx_v8_0_init_pg(adev);
4181         gfx_v8_0_rlc_start(adev);
4182
4183         return 0;
4184 }
4185
4186 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4187 {
4188         int i;
4189         u32 tmp = RREG32(mmCP_ME_CNTL);
4190
4191         if (enable) {
4192                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4193                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4194                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4195         } else {
4196                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4197                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4198                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4199                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4200                         adev->gfx.gfx_ring[i].ready = false;
4201         }
4202         WREG32(mmCP_ME_CNTL, tmp);
4203         udelay(50);
4204 }
4205
4206 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4207 {
4208         u32 count = 0;
4209         const struct cs_section_def *sect = NULL;
4210         const struct cs_extent_def *ext = NULL;
4211
4212         /* begin clear state */
4213         count += 2;
4214         /* context control state */
4215         count += 3;
4216
4217         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4218                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4219                         if (sect->id == SECT_CONTEXT)
4220                                 count += 2 + ext->reg_count;
4221                         else
4222                                 return 0;
4223                 }
4224         }
4225         /* pa_sc_raster_config/pa_sc_raster_config1 */
4226         count += 4;
4227         /* end clear state */
4228         count += 2;
4229         /* clear state */
4230         count += 2;
4231
4232         return count;
4233 }
4234
4235 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4236 {
4237         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4238         const struct cs_section_def *sect = NULL;
4239         const struct cs_extent_def *ext = NULL;
4240         int r, i;
4241
4242         /* init the CP */
4243         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4244         WREG32(mmCP_ENDIAN_SWAP, 0);
4245         WREG32(mmCP_DEVICE_ID, 1);
4246
4247         gfx_v8_0_cp_gfx_enable(adev, true);
4248
4249         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4250         if (r) {
4251                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4252                 return r;
4253         }
4254
4255         /* clear state buffer */
4256         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4257         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4258
4259         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4260         amdgpu_ring_write(ring, 0x80000000);
4261         amdgpu_ring_write(ring, 0x80000000);
4262
4263         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4264                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4265                         if (sect->id == SECT_CONTEXT) {
4266                                 amdgpu_ring_write(ring,
4267                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4268                                                ext->reg_count));
4269                                 amdgpu_ring_write(ring,
4270                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4271                                 for (i = 0; i < ext->reg_count; i++)
4272                                         amdgpu_ring_write(ring, ext->extent[i]);
4273                         }
4274                 }
4275         }
4276
4277         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4278         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4279         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4280         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4281
4282         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4283         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4284
4285         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4286         amdgpu_ring_write(ring, 0);
4287
4288         /* init the CE partitions */
4289         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4290         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4291         amdgpu_ring_write(ring, 0x8000);
4292         amdgpu_ring_write(ring, 0x8000);
4293
4294         amdgpu_ring_commit(ring);
4295
4296         return 0;
4297 }
4298 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4299 {
4300         u32 tmp;
4301         /* no gfx doorbells on iceland */
4302         if (adev->asic_type == CHIP_TOPAZ)
4303                 return;
4304
4305         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4306
4307         if (ring->use_doorbell) {
4308                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4309                                 DOORBELL_OFFSET, ring->doorbell_index);
4310                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4311                                                 DOORBELL_HIT, 0);
4312                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4313                                             DOORBELL_EN, 1);
4314         } else {
4315                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4316         }
4317
4318         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4319
4320         if (adev->flags & AMD_IS_APU)
4321                 return;
4322
4323         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4324                                         DOORBELL_RANGE_LOWER,
4325                                         AMDGPU_DOORBELL_GFX_RING0);
4326         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4327
4328         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4329                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4330 }
4331
4332 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4333 {
4334         struct amdgpu_ring *ring;
4335         u32 tmp;
4336         u32 rb_bufsz;
4337         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4338         int r;
4339
4340         /* Set the write pointer delay */
4341         WREG32(mmCP_RB_WPTR_DELAY, 0);
4342
4343         /* set the RB to use vmid 0 */
4344         WREG32(mmCP_RB_VMID, 0);
4345
4346         /* Set ring buffer size */
4347         ring = &adev->gfx.gfx_ring[0];
4348         rb_bufsz = order_base_2(ring->ring_size / 8);
4349         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4350         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4351         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4352         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4353 #ifdef __BIG_ENDIAN
4354         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4355 #endif
4356         WREG32(mmCP_RB0_CNTL, tmp);
4357
4358         /* Initialize the ring buffer's read and write pointers */
4359         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4360         ring->wptr = 0;
4361         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4362
4363         /* set the wb address wether it's enabled or not */
4364         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4365         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4366         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4367
4368         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4369         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4370         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4371         mdelay(1);
4372         WREG32(mmCP_RB0_CNTL, tmp);
4373
4374         rb_addr = ring->gpu_addr >> 8;
4375         WREG32(mmCP_RB0_BASE, rb_addr);
4376         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4377
4378         gfx_v8_0_set_cpg_door_bell(adev, ring);
4379         /* start the ring */
4380         amdgpu_ring_clear_ring(ring);
4381         gfx_v8_0_cp_gfx_start(adev);
4382         ring->ready = true;
4383         r = amdgpu_ring_test_ring(ring);
4384         if (r)
4385                 ring->ready = false;
4386
4387         return r;
4388 }
4389
4390 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4391 {
4392         int i;
4393
4394         if (enable) {
4395                 WREG32(mmCP_MEC_CNTL, 0);
4396         } else {
4397                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4398                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4399                         adev->gfx.compute_ring[i].ready = false;
4400                 adev->gfx.kiq.ring.ready = false;
4401         }
4402         udelay(50);
4403 }
4404
4405 /* KIQ functions */
4406 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4407 {
4408         uint32_t tmp;
4409         struct amdgpu_device *adev = ring->adev;
4410
4411         /* tell RLC which is KIQ queue */
4412         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4413         tmp &= 0xffffff00;
4414         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4415         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4416         tmp |= 0x80;
4417         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4418 }
4419
4420 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4421 {
4422         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4423         uint64_t queue_mask = 0;
4424         int r, i;
4425
4426         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4427                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4428                         continue;
4429
4430                 /* This situation may be hit in the future if a new HW
4431                  * generation exposes more than 64 queues. If so, the
4432                  * definition of queue_mask needs updating */
4433                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4434                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4435                         break;
4436                 }
4437
4438                 queue_mask |= (1ull << i);
4439         }
4440
4441         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4442         if (r) {
4443                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4444                 return r;
4445         }
4446         /* set resources */
4447         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4448         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4449         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4450         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4451         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4452         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4453         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4454         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4455         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4456                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4457                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4458                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4459
4460                 /* map queues */
4461                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4462                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4463                 amdgpu_ring_write(kiq_ring,
4464                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4465                 amdgpu_ring_write(kiq_ring,
4466                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4467                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4468                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4469                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4470                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4471                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4472                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4473                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4474         }
4475
4476         r = amdgpu_ring_test_ring(kiq_ring);
4477         if (r) {
4478                 DRM_ERROR("KCQ enable failed\n");
4479                 kiq_ring->ready = false;
4480         }
4481         return r;
4482 }
4483
4484 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4485 {
4486         int i, r = 0;
4487
4488         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4489                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4490                 for (i = 0; i < adev->usec_timeout; i++) {
4491                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4492                                 break;
4493                         udelay(1);
4494                 }
4495                 if (i == adev->usec_timeout)
4496                         r = -ETIMEDOUT;
4497         }
4498         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4499         WREG32(mmCP_HQD_PQ_RPTR, 0);
4500         WREG32(mmCP_HQD_PQ_WPTR, 0);
4501
4502         return r;
4503 }
4504
4505 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4506 {
4507         struct amdgpu_device *adev = ring->adev;
4508         struct vi_mqd *mqd = ring->mqd_ptr;
4509         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4510         uint32_t tmp;
4511
4512         mqd->header = 0xC0310800;
4513         mqd->compute_pipelinestat_enable = 0x00000001;
4514         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4515         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4516         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4517         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4518         mqd->compute_misc_reserved = 0x00000003;
4519         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4520                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4521         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4522                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4523         eop_base_addr = ring->eop_gpu_addr >> 8;
4524         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4525         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4526
4527         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4528         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4529         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4530                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4531
4532         mqd->cp_hqd_eop_control = tmp;
4533
4534         /* enable doorbell? */
4535         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4536                             CP_HQD_PQ_DOORBELL_CONTROL,
4537                             DOORBELL_EN,
4538                             ring->use_doorbell ? 1 : 0);
4539
4540         mqd->cp_hqd_pq_doorbell_control = tmp;
4541
4542         /* set the pointer to the MQD */
4543         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4544         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4545
4546         /* set MQD vmid to 0 */
4547         tmp = RREG32(mmCP_MQD_CONTROL);
4548         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4549         mqd->cp_mqd_control = tmp;
4550
4551         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4552         hqd_gpu_addr = ring->gpu_addr >> 8;
4553         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4554         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4555
4556         /* set up the HQD, this is similar to CP_RB0_CNTL */
4557         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4558         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4559                             (order_base_2(ring->ring_size / 4) - 1));
4560         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4561                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4562 #ifdef __BIG_ENDIAN
4563         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4564 #endif
4565         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4566         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4567         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4568         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4569         mqd->cp_hqd_pq_control = tmp;
4570
4571         /* set the wb address whether it's enabled or not */
4572         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4573         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4574         mqd->cp_hqd_pq_rptr_report_addr_hi =
4575                 upper_32_bits(wb_gpu_addr) & 0xffff;
4576
4577         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4578         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4579         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4580         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4581
4582         tmp = 0;
4583         /* enable the doorbell if requested */
4584         if (ring->use_doorbell) {
4585                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4586                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4587                                 DOORBELL_OFFSET, ring->doorbell_index);
4588
4589                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4590                                          DOORBELL_EN, 1);
4591                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4592                                          DOORBELL_SOURCE, 0);
4593                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4594                                          DOORBELL_HIT, 0);
4595         }
4596
4597         mqd->cp_hqd_pq_doorbell_control = tmp;
4598
4599         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4600         ring->wptr = 0;
4601         mqd->cp_hqd_pq_wptr = ring->wptr;
4602         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4603
4604         /* set the vmid for the queue */
4605         mqd->cp_hqd_vmid = 0;
4606
4607         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4608         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4609         mqd->cp_hqd_persistent_state = tmp;
4610
4611         /* set MTYPE */
4612         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4613         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4614         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4615         mqd->cp_hqd_ib_control = tmp;
4616
4617         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4618         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4619         mqd->cp_hqd_iq_timer = tmp;
4620
4621         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4622         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4623         mqd->cp_hqd_ctx_save_control = tmp;
4624
4625         /* defaults */
4626         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4627         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4628         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4629         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4630         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4631         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4632         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4633         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4634         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4635         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4636         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4637         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4638         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4639         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4640         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4641
4642         /* activate the queue */
4643         mqd->cp_hqd_active = 1;
4644
4645         return 0;
4646 }
4647
4648 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4649                         struct vi_mqd *mqd)
4650 {
4651         uint32_t mqd_reg;
4652         uint32_t *mqd_data;
4653
4654         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4655         mqd_data = &mqd->cp_mqd_base_addr_lo;
4656
4657         /* disable wptr polling */
4658         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4659
4660         /* program all HQD registers */
4661         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4662                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4663
4664         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4665          * This is safe since EOP RPTR==WPTR for any inactive HQD
4666          * on ASICs that do not support context-save.
4667          * EOP writes/reads can start anywhere in the ring.
4668          */
4669         if (adev->asic_type != CHIP_TONGA) {
4670                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4671                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4672                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4673         }
4674
4675         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4676                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4677
4678         /* activate the HQD */
4679         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4680                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4681
4682         return 0;
4683 }
4684
4685 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4686 {
4687         struct amdgpu_device *adev = ring->adev;
4688         struct vi_mqd *mqd = ring->mqd_ptr;
4689         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4690
4691         gfx_v8_0_kiq_setting(ring);
4692
4693         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4694                 /* reset MQD to a clean status */
4695                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4696                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4697
4698                 /* reset ring buffer */
4699                 ring->wptr = 0;
4700                 amdgpu_ring_clear_ring(ring);
4701                 mutex_lock(&adev->srbm_mutex);
4702                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4703                 gfx_v8_0_mqd_commit(adev, mqd);
4704                 vi_srbm_select(adev, 0, 0, 0, 0);
4705                 mutex_unlock(&adev->srbm_mutex);
4706         } else {
4707                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4708                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4709                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4710                 mutex_lock(&adev->srbm_mutex);
4711                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4712                 gfx_v8_0_mqd_init(ring);
4713                 gfx_v8_0_mqd_commit(adev, mqd);
4714                 vi_srbm_select(adev, 0, 0, 0, 0);
4715                 mutex_unlock(&adev->srbm_mutex);
4716
4717                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4718                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4719         }
4720
4721         return 0;
4722 }
4723
4724 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4725 {
4726         struct amdgpu_device *adev = ring->adev;
4727         struct vi_mqd *mqd = ring->mqd_ptr;
4728         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4729
4730         if (!adev->in_gpu_reset && !adev->in_suspend) {
4731                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4732                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4733                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4734                 mutex_lock(&adev->srbm_mutex);
4735                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4736                 gfx_v8_0_mqd_init(ring);
4737                 vi_srbm_select(adev, 0, 0, 0, 0);
4738                 mutex_unlock(&adev->srbm_mutex);
4739
4740                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4741                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4742         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4743                 /* reset MQD to a clean status */
4744                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4745                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4746                 /* reset ring buffer */
4747                 ring->wptr = 0;
4748                 amdgpu_ring_clear_ring(ring);
4749         } else {
4750                 amdgpu_ring_clear_ring(ring);
4751         }
4752         return 0;
4753 }
4754
4755 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4756 {
4757         if (adev->asic_type > CHIP_TONGA) {
4758                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
4759                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
4760         }
4761         /* enable doorbells */
4762         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4763 }
4764
4765 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4766 {
4767         struct amdgpu_ring *ring;
4768         int r;
4769
4770         ring = &adev->gfx.kiq.ring;
4771
4772         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4773         if (unlikely(r != 0))
4774                 return r;
4775
4776         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4777         if (unlikely(r != 0))
4778                 return r;
4779
4780         gfx_v8_0_kiq_init_queue(ring);
4781         amdgpu_bo_kunmap(ring->mqd_obj);
4782         ring->mqd_ptr = NULL;
4783         amdgpu_bo_unreserve(ring->mqd_obj);
4784         ring->ready = true;
4785         return 0;
4786 }
4787
4788 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4789 {
4790         struct amdgpu_ring *ring = NULL;
4791         int r = 0, i;
4792
4793         gfx_v8_0_cp_compute_enable(adev, true);
4794
4795         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4796                 ring = &adev->gfx.compute_ring[i];
4797
4798                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4799                 if (unlikely(r != 0))
4800                         goto done;
4801                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4802                 if (!r) {
4803                         r = gfx_v8_0_kcq_init_queue(ring);
4804                         amdgpu_bo_kunmap(ring->mqd_obj);
4805                         ring->mqd_ptr = NULL;
4806                 }
4807                 amdgpu_bo_unreserve(ring->mqd_obj);
4808                 if (r)
4809                         goto done;
4810         }
4811
4812         gfx_v8_0_set_mec_doorbell_range(adev);
4813
4814         r = gfx_v8_0_kiq_kcq_enable(adev);
4815         if (r)
4816                 goto done;
4817
4818         /* Test KCQs */
4819         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4820                 ring = &adev->gfx.compute_ring[i];
4821                 ring->ready = true;
4822                 r = amdgpu_ring_test_ring(ring);
4823                 if (r)
4824                         ring->ready = false;
4825         }
4826
4827 done:
4828         return r;
4829 }
4830
4831 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4832 {
4833         int r;
4834
4835         if (!(adev->flags & AMD_IS_APU))
4836                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4837
4838         r = gfx_v8_0_kiq_resume(adev);
4839         if (r)
4840                 return r;
4841
4842         r = gfx_v8_0_cp_gfx_resume(adev);
4843         if (r)
4844                 return r;
4845
4846         r = gfx_v8_0_kcq_resume(adev);
4847         if (r)
4848                 return r;
4849         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4850
4851         return 0;
4852 }
4853
4854 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4855 {
4856         gfx_v8_0_cp_gfx_enable(adev, enable);
4857         gfx_v8_0_cp_compute_enable(adev, enable);
4858 }
4859
4860 static int gfx_v8_0_hw_init(void *handle)
4861 {
4862         int r;
4863         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4864
4865         gfx_v8_0_init_golden_registers(adev);
4866         gfx_v8_0_constants_init(adev);
4867
4868         r = gfx_v8_0_rlc_resume(adev);
4869         if (r)
4870                 return r;
4871
4872         r = gfx_v8_0_cp_resume(adev);
4873
4874         return r;
4875 }
4876
4877 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4878 {
4879         int r, i;
4880         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4881
4882         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4883         if (r)
4884                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4885
4886         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4887                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4888
4889                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4890                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4891                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4892                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4893                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4894                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4895                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4896                 amdgpu_ring_write(kiq_ring, 0);
4897                 amdgpu_ring_write(kiq_ring, 0);
4898                 amdgpu_ring_write(kiq_ring, 0);
4899         }
4900         r = amdgpu_ring_test_ring(kiq_ring);
4901         if (r)
4902                 DRM_ERROR("KCQ disable failed\n");
4903
4904         return r;
4905 }
4906
4907 static bool gfx_v8_0_is_idle(void *handle)
4908 {
4909         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4910
4911         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4912                 || RREG32(mmGRBM_STATUS2) != 0x8)
4913                 return false;
4914         else
4915                 return true;
4916 }
4917
4918 static bool gfx_v8_0_rlc_is_idle(void *handle)
4919 {
4920         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4921
4922         if (RREG32(mmGRBM_STATUS2) != 0x8)
4923                 return false;
4924         else
4925                 return true;
4926 }
4927
4928 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4929 {
4930         unsigned int i;
4931         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4932
4933         for (i = 0; i < adev->usec_timeout; i++) {
4934                 if (gfx_v8_0_rlc_is_idle(handle))
4935                         return 0;
4936
4937                 udelay(1);
4938         }
4939         return -ETIMEDOUT;
4940 }
4941
4942 static int gfx_v8_0_wait_for_idle(void *handle)
4943 {
4944         unsigned int i;
4945         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4946
4947         for (i = 0; i < adev->usec_timeout; i++) {
4948                 if (gfx_v8_0_is_idle(handle))
4949                         return 0;
4950
4951                 udelay(1);
4952         }
4953         return -ETIMEDOUT;
4954 }
4955
4956 static int gfx_v8_0_hw_fini(void *handle)
4957 {
4958         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4959
4960         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4961         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4962
4963         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4964
4965         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4966
4967         /* disable KCQ to avoid CPC touch memory not valid anymore */
4968         gfx_v8_0_kcq_disable(adev);
4969
4970         if (amdgpu_sriov_vf(adev)) {
4971                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4972                 return 0;
4973         }
4974         adev->gfx.rlc.funcs->enter_safe_mode(adev);
4975         if (!gfx_v8_0_wait_for_idle(adev))
4976                 gfx_v8_0_cp_enable(adev, false);
4977         else
4978                 pr_err("cp is busy, skip halt cp\n");
4979         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4980                 gfx_v8_0_rlc_stop(adev);
4981         else
4982                 pr_err("rlc is busy, skip halt rlc\n");
4983         adev->gfx.rlc.funcs->exit_safe_mode(adev);
4984         return 0;
4985 }
4986
4987 static int gfx_v8_0_suspend(void *handle)
4988 {
4989         return gfx_v8_0_hw_fini(handle);
4990 }
4991
4992 static int gfx_v8_0_resume(void *handle)
4993 {
4994         return gfx_v8_0_hw_init(handle);
4995 }
4996
4997 static bool gfx_v8_0_check_soft_reset(void *handle)
4998 {
4999         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5000         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5001         u32 tmp;
5002
5003         /* GRBM_STATUS */
5004         tmp = RREG32(mmGRBM_STATUS);
5005         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5006                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5007                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5008                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5009                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5010                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5011                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5012                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5013                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5014                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5015                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5016                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5017                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5018         }
5019
5020         /* GRBM_STATUS2 */
5021         tmp = RREG32(mmGRBM_STATUS2);
5022         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5023                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5024                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5025
5026         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5027             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5028             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5029                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5030                                                 SOFT_RESET_CPF, 1);
5031                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5032                                                 SOFT_RESET_CPC, 1);
5033                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5034                                                 SOFT_RESET_CPG, 1);
5035                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5036                                                 SOFT_RESET_GRBM, 1);
5037         }
5038
5039         /* SRBM_STATUS */
5040         tmp = RREG32(mmSRBM_STATUS);
5041         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5042                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5043                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5044         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5045                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5046                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5047
5048         if (grbm_soft_reset || srbm_soft_reset) {
5049                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5050                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5051                 return true;
5052         } else {
5053                 adev->gfx.grbm_soft_reset = 0;
5054                 adev->gfx.srbm_soft_reset = 0;
5055                 return false;
5056         }
5057 }
5058
5059 static int gfx_v8_0_pre_soft_reset(void *handle)
5060 {
5061         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5062         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5063
5064         if ((!adev->gfx.grbm_soft_reset) &&
5065             (!adev->gfx.srbm_soft_reset))
5066                 return 0;
5067
5068         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5069         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5070
5071         /* stop the rlc */
5072         gfx_v8_0_rlc_stop(adev);
5073
5074         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5075             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5076                 /* Disable GFX parsing/prefetching */
5077                 gfx_v8_0_cp_gfx_enable(adev, false);
5078
5079         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5080             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5081             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5082             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5083                 int i;
5084
5085                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5086                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5087
5088                         mutex_lock(&adev->srbm_mutex);
5089                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5090                         gfx_v8_0_deactivate_hqd(adev, 2);
5091                         vi_srbm_select(adev, 0, 0, 0, 0);
5092                         mutex_unlock(&adev->srbm_mutex);
5093                 }
5094                 /* Disable MEC parsing/prefetching */
5095                 gfx_v8_0_cp_compute_enable(adev, false);
5096         }
5097
5098        return 0;
5099 }
5100
5101 static int gfx_v8_0_soft_reset(void *handle)
5102 {
5103         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5104         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5105         u32 tmp;
5106
5107         if ((!adev->gfx.grbm_soft_reset) &&
5108             (!adev->gfx.srbm_soft_reset))
5109                 return 0;
5110
5111         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5112         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5113
5114         if (grbm_soft_reset || srbm_soft_reset) {
5115                 tmp = RREG32(mmGMCON_DEBUG);
5116                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5117                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5118                 WREG32(mmGMCON_DEBUG, tmp);
5119                 udelay(50);
5120         }
5121
5122         if (grbm_soft_reset) {
5123                 tmp = RREG32(mmGRBM_SOFT_RESET);
5124                 tmp |= grbm_soft_reset;
5125                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5126                 WREG32(mmGRBM_SOFT_RESET, tmp);
5127                 tmp = RREG32(mmGRBM_SOFT_RESET);
5128
5129                 udelay(50);
5130
5131                 tmp &= ~grbm_soft_reset;
5132                 WREG32(mmGRBM_SOFT_RESET, tmp);
5133                 tmp = RREG32(mmGRBM_SOFT_RESET);
5134         }
5135
5136         if (srbm_soft_reset) {
5137                 tmp = RREG32(mmSRBM_SOFT_RESET);
5138                 tmp |= srbm_soft_reset;
5139                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5140                 WREG32(mmSRBM_SOFT_RESET, tmp);
5141                 tmp = RREG32(mmSRBM_SOFT_RESET);
5142
5143                 udelay(50);
5144
5145                 tmp &= ~srbm_soft_reset;
5146                 WREG32(mmSRBM_SOFT_RESET, tmp);
5147                 tmp = RREG32(mmSRBM_SOFT_RESET);
5148         }
5149
5150         if (grbm_soft_reset || srbm_soft_reset) {
5151                 tmp = RREG32(mmGMCON_DEBUG);
5152                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5153                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5154                 WREG32(mmGMCON_DEBUG, tmp);
5155         }
5156
5157         /* Wait a little for things to settle down */
5158         udelay(50);
5159
5160         return 0;
5161 }
5162
5163 static int gfx_v8_0_post_soft_reset(void *handle)
5164 {
5165         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5166         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5167
5168         if ((!adev->gfx.grbm_soft_reset) &&
5169             (!adev->gfx.srbm_soft_reset))
5170                 return 0;
5171
5172         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5173         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5174
5175         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5176             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5177             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5178             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5179                 int i;
5180
5181                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5182                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5183
5184                         mutex_lock(&adev->srbm_mutex);
5185                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5186                         gfx_v8_0_deactivate_hqd(adev, 2);
5187                         vi_srbm_select(adev, 0, 0, 0, 0);
5188                         mutex_unlock(&adev->srbm_mutex);
5189                 }
5190                 gfx_v8_0_kiq_resume(adev);
5191                 gfx_v8_0_kcq_resume(adev);
5192         }
5193
5194         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5195             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5196                 gfx_v8_0_cp_gfx_resume(adev);
5197
5198         gfx_v8_0_rlc_start(adev);
5199
5200         return 0;
5201 }
5202
5203 /**
5204  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5205  *
5206  * @adev: amdgpu_device pointer
5207  *
5208  * Fetches a GPU clock counter snapshot.
5209  * Returns the 64 bit clock counter snapshot.
5210  */
5211 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5212 {
5213         uint64_t clock;
5214
5215         mutex_lock(&adev->gfx.gpu_clock_mutex);
5216         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5217         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5218                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5219         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5220         return clock;
5221 }
5222
5223 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5224                                           uint32_t vmid,
5225                                           uint32_t gds_base, uint32_t gds_size,
5226                                           uint32_t gws_base, uint32_t gws_size,
5227                                           uint32_t oa_base, uint32_t oa_size)
5228 {
5229         /* GDS Base */
5230         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5231         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5232                                 WRITE_DATA_DST_SEL(0)));
5233         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5234         amdgpu_ring_write(ring, 0);
5235         amdgpu_ring_write(ring, gds_base);
5236
5237         /* GDS Size */
5238         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5239         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5240                                 WRITE_DATA_DST_SEL(0)));
5241         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5242         amdgpu_ring_write(ring, 0);
5243         amdgpu_ring_write(ring, gds_size);
5244
5245         /* GWS */
5246         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5247         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5248                                 WRITE_DATA_DST_SEL(0)));
5249         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5250         amdgpu_ring_write(ring, 0);
5251         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5252
5253         /* OA */
5254         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5255         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5256                                 WRITE_DATA_DST_SEL(0)));
5257         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5258         amdgpu_ring_write(ring, 0);
5259         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5260 }
5261
5262 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5263 {
5264         WREG32(mmSQ_IND_INDEX,
5265                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5266                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5267                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5268                 (SQ_IND_INDEX__FORCE_READ_MASK));
5269         return RREG32(mmSQ_IND_DATA);
5270 }
5271
5272 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5273                            uint32_t wave, uint32_t thread,
5274                            uint32_t regno, uint32_t num, uint32_t *out)
5275 {
5276         WREG32(mmSQ_IND_INDEX,
5277                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5278                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5279                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5280                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5281                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5282                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5283         while (num--)
5284                 *(out++) = RREG32(mmSQ_IND_DATA);
5285 }
5286
5287 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5288 {
5289         /* type 0 wave data */
5290         dst[(*no_fields)++] = 0;
5291         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5292         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5293         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5294         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5295         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5296         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5297         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5298         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5299         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5300         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5301         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5302         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5303         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5304         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5305         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5306         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5307         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5308         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5309 }
5310
5311 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5312                                      uint32_t wave, uint32_t start,
5313                                      uint32_t size, uint32_t *dst)
5314 {
5315         wave_read_regs(
5316                 adev, simd, wave, 0,
5317                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5318 }
5319
5320
5321 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5322         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5323         .select_se_sh = &gfx_v8_0_select_se_sh,
5324         .read_wave_data = &gfx_v8_0_read_wave_data,
5325         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5326         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5327 };
5328
5329 static int gfx_v8_0_early_init(void *handle)
5330 {
5331         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5332
5333         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5334         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5335         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5336         gfx_v8_0_set_ring_funcs(adev);
5337         gfx_v8_0_set_irq_funcs(adev);
5338         gfx_v8_0_set_gds_init(adev);
5339         gfx_v8_0_set_rlc_funcs(adev);
5340
5341         return 0;
5342 }
5343
5344 static int gfx_v8_0_late_init(void *handle)
5345 {
5346         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5347         int r;
5348
5349         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5350         if (r)
5351                 return r;
5352
5353         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5354         if (r)
5355                 return r;
5356
5357         /* requires IBs so do in late init after IB pool is initialized */
5358         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5359         if (r)
5360                 return r;
5361
5362         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5363         if (r) {
5364                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5365                 return r;
5366         }
5367
5368         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5369         if (r) {
5370                 DRM_ERROR(
5371                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5372                         r);
5373                 return r;
5374         }
5375
5376         return 0;
5377 }
5378
5379 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5380                                                        bool enable)
5381 {
5382         if (((adev->asic_type == CHIP_POLARIS11) ||
5383             (adev->asic_type == CHIP_POLARIS12) ||
5384             (adev->asic_type == CHIP_VEGAM)) &&
5385             adev->powerplay.pp_funcs->set_powergating_by_smu)
5386                 /* Send msg to SMU via Powerplay */
5387                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5388
5389         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5390 }
5391
5392 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5393                                                         bool enable)
5394 {
5395         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5396 }
5397
5398 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5399                 bool enable)
5400 {
5401         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5402 }
5403
5404 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5405                                           bool enable)
5406 {
5407         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5408 }
5409
5410 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5411                                                 bool enable)
5412 {
5413         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5414
5415         /* Read any GFX register to wake up GFX. */
5416         if (!enable)
5417                 RREG32(mmDB_RENDER_CONTROL);
5418 }
5419
5420 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5421                                           bool enable)
5422 {
5423         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5424                 cz_enable_gfx_cg_power_gating(adev, true);
5425                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5426                         cz_enable_gfx_pipeline_power_gating(adev, true);
5427         } else {
5428                 cz_enable_gfx_cg_power_gating(adev, false);
5429                 cz_enable_gfx_pipeline_power_gating(adev, false);
5430         }
5431 }
5432
5433 static int gfx_v8_0_set_powergating_state(void *handle,
5434                                           enum amd_powergating_state state)
5435 {
5436         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5437         bool enable = (state == AMD_PG_STATE_GATE);
5438
5439         if (amdgpu_sriov_vf(adev))
5440                 return 0;
5441
5442         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5443                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5444                                 AMD_PG_SUPPORT_CP |
5445                                 AMD_PG_SUPPORT_GFX_DMG))
5446                 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5447         switch (adev->asic_type) {
5448         case CHIP_CARRIZO:
5449         case CHIP_STONEY:
5450
5451                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5452                         cz_enable_sck_slow_down_on_power_up(adev, true);
5453                         cz_enable_sck_slow_down_on_power_down(adev, true);
5454                 } else {
5455                         cz_enable_sck_slow_down_on_power_up(adev, false);
5456                         cz_enable_sck_slow_down_on_power_down(adev, false);
5457                 }
5458                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5459                         cz_enable_cp_power_gating(adev, true);
5460                 else
5461                         cz_enable_cp_power_gating(adev, false);
5462
5463                 cz_update_gfx_cg_power_gating(adev, enable);
5464
5465                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5466                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5467                 else
5468                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5469
5470                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5471                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5472                 else
5473                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5474                 break;
5475         case CHIP_POLARIS11:
5476         case CHIP_POLARIS12:
5477         case CHIP_VEGAM:
5478                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5479                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5480                 else
5481                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5482
5483                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5484                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5485                 else
5486                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5487
5488                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5489                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5490                 else
5491                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5492                 break;
5493         default:
5494                 break;
5495         }
5496         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5497                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5498                                 AMD_PG_SUPPORT_CP |
5499                                 AMD_PG_SUPPORT_GFX_DMG))
5500                 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5501         return 0;
5502 }
5503
5504 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5505 {
5506         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5507         int data;
5508
5509         if (amdgpu_sriov_vf(adev))
5510                 *flags = 0;
5511
5512         /* AMD_CG_SUPPORT_GFX_MGCG */
5513         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5514         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5515                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5516
5517         /* AMD_CG_SUPPORT_GFX_CGLG */
5518         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5519         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5520                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5521
5522         /* AMD_CG_SUPPORT_GFX_CGLS */
5523         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5524                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5525
5526         /* AMD_CG_SUPPORT_GFX_CGTS */
5527         data = RREG32(mmCGTS_SM_CTRL_REG);
5528         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5529                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5530
5531         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5532         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5533                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5534
5535         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5536         data = RREG32(mmRLC_MEM_SLP_CNTL);
5537         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5538                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5539
5540         /* AMD_CG_SUPPORT_GFX_CP_LS */
5541         data = RREG32(mmCP_MEM_SLP_CNTL);
5542         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5543                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5544 }
5545
5546 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5547                                      uint32_t reg_addr, uint32_t cmd)
5548 {
5549         uint32_t data;
5550
5551         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5552
5553         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5554         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5555
5556         data = RREG32(mmRLC_SERDES_WR_CTRL);
5557         if (adev->asic_type == CHIP_STONEY)
5558                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5559                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5560                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5561                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5562                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5563                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5564                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5565                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5566                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5567         else
5568                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5569                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5570                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5571                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5572                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5573                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5574                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5575                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5576                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5577                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5578                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5579         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5580                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5581                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5582                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5583
5584         WREG32(mmRLC_SERDES_WR_CTRL, data);
5585 }
5586
5587 #define MSG_ENTER_RLC_SAFE_MODE     1
5588 #define MSG_EXIT_RLC_SAFE_MODE      0
5589 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5590 #define RLC_GPR_REG2__REQ__SHIFT 0
5591 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5592 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5593
5594 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5595 {
5596         u32 data;
5597         unsigned i;
5598
5599         data = RREG32(mmRLC_CNTL);
5600         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5601                 return;
5602
5603         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5604                 data |= RLC_SAFE_MODE__CMD_MASK;
5605                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5606                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5607                 WREG32(mmRLC_SAFE_MODE, data);
5608
5609                 for (i = 0; i < adev->usec_timeout; i++) {
5610                         if ((RREG32(mmRLC_GPM_STAT) &
5611                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5612                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5613                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5614                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5615                                 break;
5616                         udelay(1);
5617                 }
5618
5619                 for (i = 0; i < adev->usec_timeout; i++) {
5620                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5621                                 break;
5622                         udelay(1);
5623                 }
5624                 adev->gfx.rlc.in_safe_mode = true;
5625         }
5626 }
5627
5628 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5629 {
5630         u32 data = 0;
5631         unsigned i;
5632
5633         data = RREG32(mmRLC_CNTL);
5634         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5635                 return;
5636
5637         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5638                 if (adev->gfx.rlc.in_safe_mode) {
5639                         data |= RLC_SAFE_MODE__CMD_MASK;
5640                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5641                         WREG32(mmRLC_SAFE_MODE, data);
5642                         adev->gfx.rlc.in_safe_mode = false;
5643                 }
5644         }
5645
5646         for (i = 0; i < adev->usec_timeout; i++) {
5647                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5648                         break;
5649                 udelay(1);
5650         }
5651 }
5652
5653 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5654         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5655         .exit_safe_mode = iceland_exit_rlc_safe_mode
5656 };
5657
5658 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5659                                                       bool enable)
5660 {
5661         uint32_t temp, data;
5662
5663         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5664
5665         /* It is disabled by HW by default */
5666         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5667                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5668                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5669                                 /* 1 - RLC memory Light sleep */
5670                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5671
5672                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5673                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5674                 }
5675
5676                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5677                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5678                 if (adev->flags & AMD_IS_APU)
5679                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5680                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5681                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5682                 else
5683                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5684                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5685                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5686                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5687
5688                 if (temp != data)
5689                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5690
5691                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5692                 gfx_v8_0_wait_for_rlc_serdes(adev);
5693
5694                 /* 5 - clear mgcg override */
5695                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5696
5697                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5698                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5699                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5700                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5701                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5702                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5703                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5704                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5705                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5706                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5707                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5708                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5709                         if (temp != data)
5710                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5711                 }
5712                 udelay(50);
5713
5714                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5715                 gfx_v8_0_wait_for_rlc_serdes(adev);
5716         } else {
5717                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5718                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5719                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5720                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5721                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5722                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5723                 if (temp != data)
5724                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5725
5726                 /* 2 - disable MGLS in RLC */
5727                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5728                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5729                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5730                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5731                 }
5732
5733                 /* 3 - disable MGLS in CP */
5734                 data = RREG32(mmCP_MEM_SLP_CNTL);
5735                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5736                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5737                         WREG32(mmCP_MEM_SLP_CNTL, data);
5738                 }
5739
5740                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5741                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5742                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5743                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5744                 if (temp != data)
5745                         WREG32(mmCGTS_SM_CTRL_REG, data);
5746
5747                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5748                 gfx_v8_0_wait_for_rlc_serdes(adev);
5749
5750                 /* 6 - set mgcg override */
5751                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5752
5753                 udelay(50);
5754
5755                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5756                 gfx_v8_0_wait_for_rlc_serdes(adev);
5757         }
5758
5759         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5760 }
5761
5762 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5763                                                       bool enable)
5764 {
5765         uint32_t temp, temp1, data, data1;
5766
5767         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5768
5769         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5770
5771         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5772                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5773                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5774                 if (temp1 != data1)
5775                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5776
5777                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5778                 gfx_v8_0_wait_for_rlc_serdes(adev);
5779
5780                 /* 2 - clear cgcg override */
5781                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5782
5783                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5784                 gfx_v8_0_wait_for_rlc_serdes(adev);
5785
5786                 /* 3 - write cmd to set CGLS */
5787                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5788
5789                 /* 4 - enable cgcg */
5790                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5791
5792                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5793                         /* enable cgls*/
5794                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5795
5796                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5797                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5798
5799                         if (temp1 != data1)
5800                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5801                 } else {
5802                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5803                 }
5804
5805                 if (temp != data)
5806                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5807
5808                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5809                  * Cmp_busy/GFX_Idle interrupts
5810                  */
5811                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5812         } else {
5813                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5814                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5815
5816                 /* TEST CGCG */
5817                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5818                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5819                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5820                 if (temp1 != data1)
5821                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5822
5823                 /* read gfx register to wake up cgcg */
5824                 RREG32(mmCB_CGTT_SCLK_CTRL);
5825                 RREG32(mmCB_CGTT_SCLK_CTRL);
5826                 RREG32(mmCB_CGTT_SCLK_CTRL);
5827                 RREG32(mmCB_CGTT_SCLK_CTRL);
5828
5829                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5830                 gfx_v8_0_wait_for_rlc_serdes(adev);
5831
5832                 /* write cmd to Set CGCG Overrride */
5833                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5834
5835                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5836                 gfx_v8_0_wait_for_rlc_serdes(adev);
5837
5838                 /* write cmd to Clear CGLS */
5839                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5840
5841                 /* disable cgcg, cgls should be disabled too. */
5842                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5843                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5844                 if (temp != data)
5845                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5846                 /* enable interrupts again for PG */
5847                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5848         }
5849
5850         gfx_v8_0_wait_for_rlc_serdes(adev);
5851
5852         adev->gfx.rlc.funcs->exit_safe_mode(adev);
5853 }
5854 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5855                                             bool enable)
5856 {
5857         if (enable) {
5858                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5859                  * ===  MGCG + MGLS + TS(CG/LS) ===
5860                  */
5861                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5862                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5863         } else {
5864                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5865                  * ===  CGCG + CGLS ===
5866                  */
5867                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5868                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5869         }
5870         return 0;
5871 }
5872
5873 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5874                                           enum amd_clockgating_state state)
5875 {
5876         uint32_t msg_id, pp_state = 0;
5877         uint32_t pp_support_state = 0;
5878
5879         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5880                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5881                         pp_support_state = PP_STATE_SUPPORT_LS;
5882                         pp_state = PP_STATE_LS;
5883                 }
5884                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5885                         pp_support_state |= PP_STATE_SUPPORT_CG;
5886                         pp_state |= PP_STATE_CG;
5887                 }
5888                 if (state == AMD_CG_STATE_UNGATE)
5889                         pp_state = 0;
5890
5891                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5892                                 PP_BLOCK_GFX_CG,
5893                                 pp_support_state,
5894                                 pp_state);
5895                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5896                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5897         }
5898
5899         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5900                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5901                         pp_support_state = PP_STATE_SUPPORT_LS;
5902                         pp_state = PP_STATE_LS;
5903                 }
5904
5905                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5906                         pp_support_state |= PP_STATE_SUPPORT_CG;
5907                         pp_state |= PP_STATE_CG;
5908                 }
5909
5910                 if (state == AMD_CG_STATE_UNGATE)
5911                         pp_state = 0;
5912
5913                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5914                                 PP_BLOCK_GFX_MG,
5915                                 pp_support_state,
5916                                 pp_state);
5917                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5918                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5919         }
5920
5921         return 0;
5922 }
5923
5924 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5925                                           enum amd_clockgating_state state)
5926 {
5927
5928         uint32_t msg_id, pp_state = 0;
5929         uint32_t pp_support_state = 0;
5930
5931         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5932                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5933                         pp_support_state = PP_STATE_SUPPORT_LS;
5934                         pp_state = PP_STATE_LS;
5935                 }
5936                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5937                         pp_support_state |= PP_STATE_SUPPORT_CG;
5938                         pp_state |= PP_STATE_CG;
5939                 }
5940                 if (state == AMD_CG_STATE_UNGATE)
5941                         pp_state = 0;
5942
5943                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5944                                 PP_BLOCK_GFX_CG,
5945                                 pp_support_state,
5946                                 pp_state);
5947                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5948                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5949         }
5950
5951         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5952                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5953                         pp_support_state = PP_STATE_SUPPORT_LS;
5954                         pp_state = PP_STATE_LS;
5955                 }
5956                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5957                         pp_support_state |= PP_STATE_SUPPORT_CG;
5958                         pp_state |= PP_STATE_CG;
5959                 }
5960                 if (state == AMD_CG_STATE_UNGATE)
5961                         pp_state = 0;
5962
5963                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5964                                 PP_BLOCK_GFX_3D,
5965                                 pp_support_state,
5966                                 pp_state);
5967                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5968                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5969         }
5970
5971         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5972                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5973                         pp_support_state = PP_STATE_SUPPORT_LS;
5974                         pp_state = PP_STATE_LS;
5975                 }
5976
5977                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5978                         pp_support_state |= PP_STATE_SUPPORT_CG;
5979                         pp_state |= PP_STATE_CG;
5980                 }
5981
5982                 if (state == AMD_CG_STATE_UNGATE)
5983                         pp_state = 0;
5984
5985                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5986                                 PP_BLOCK_GFX_MG,
5987                                 pp_support_state,
5988                                 pp_state);
5989                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5990                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5991         }
5992
5993         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5994                 pp_support_state = PP_STATE_SUPPORT_LS;
5995
5996                 if (state == AMD_CG_STATE_UNGATE)
5997                         pp_state = 0;
5998                 else
5999                         pp_state = PP_STATE_LS;
6000
6001                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6002                                 PP_BLOCK_GFX_RLC,
6003                                 pp_support_state,
6004                                 pp_state);
6005                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6006                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6007         }
6008
6009         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6010                 pp_support_state = PP_STATE_SUPPORT_LS;
6011
6012                 if (state == AMD_CG_STATE_UNGATE)
6013                         pp_state = 0;
6014                 else
6015                         pp_state = PP_STATE_LS;
6016                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6017                         PP_BLOCK_GFX_CP,
6018                         pp_support_state,
6019                         pp_state);
6020                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
6021                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
6022         }
6023
6024         return 0;
6025 }
6026
6027 static int gfx_v8_0_set_clockgating_state(void *handle,
6028                                           enum amd_clockgating_state state)
6029 {
6030         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6031
6032         if (amdgpu_sriov_vf(adev))
6033                 return 0;
6034
6035         switch (adev->asic_type) {
6036         case CHIP_FIJI:
6037         case CHIP_CARRIZO:
6038         case CHIP_STONEY:
6039                 gfx_v8_0_update_gfx_clock_gating(adev,
6040                                                  state == AMD_CG_STATE_GATE);
6041                 break;
6042         case CHIP_TONGA:
6043                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6044                 break;
6045         case CHIP_POLARIS10:
6046         case CHIP_POLARIS11:
6047         case CHIP_POLARIS12:
6048         case CHIP_VEGAM:
6049                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6050                 break;
6051         default:
6052                 break;
6053         }
6054         return 0;
6055 }
6056
6057 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6058 {
6059         return ring->adev->wb.wb[ring->rptr_offs];
6060 }
6061
6062 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6063 {
6064         struct amdgpu_device *adev = ring->adev;
6065
6066         if (ring->use_doorbell)
6067                 /* XXX check if swapping is necessary on BE */
6068                 return ring->adev->wb.wb[ring->wptr_offs];
6069         else
6070                 return RREG32(mmCP_RB0_WPTR);
6071 }
6072
6073 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6074 {
6075         struct amdgpu_device *adev = ring->adev;
6076
6077         if (ring->use_doorbell) {
6078                 /* XXX check if swapping is necessary on BE */
6079                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6080                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6081         } else {
6082                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6083                 (void)RREG32(mmCP_RB0_WPTR);
6084         }
6085 }
6086
6087 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6088 {
6089         u32 ref_and_mask, reg_mem_engine;
6090
6091         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6092             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6093                 switch (ring->me) {
6094                 case 1:
6095                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6096                         break;
6097                 case 2:
6098                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6099                         break;
6100                 default:
6101                         return;
6102                 }
6103                 reg_mem_engine = 0;
6104         } else {
6105                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6106                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6107         }
6108
6109         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6110         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6111                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6112                                  reg_mem_engine));
6113         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6114         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6115         amdgpu_ring_write(ring, ref_and_mask);
6116         amdgpu_ring_write(ring, ref_and_mask);
6117         amdgpu_ring_write(ring, 0x20); /* poll interval */
6118 }
6119
6120 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6121 {
6122         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6123         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6124                 EVENT_INDEX(4));
6125
6126         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6127         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6128                 EVENT_INDEX(0));
6129 }
6130
6131 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6132                                       struct amdgpu_ib *ib,
6133                                       unsigned vmid, bool ctx_switch)
6134 {
6135         u32 header, control = 0;
6136
6137         if (ib->flags & AMDGPU_IB_FLAG_CE)
6138                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6139         else
6140                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6141
6142         control |= ib->length_dw | (vmid << 24);
6143
6144         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6145                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6146
6147                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6148                         gfx_v8_0_ring_emit_de_meta(ring);
6149         }
6150
6151         amdgpu_ring_write(ring, header);
6152         amdgpu_ring_write(ring,
6153 #ifdef __BIG_ENDIAN
6154                           (2 << 0) |
6155 #endif
6156                           (ib->gpu_addr & 0xFFFFFFFC));
6157         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6158         amdgpu_ring_write(ring, control);
6159 }
6160
6161 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6162                                           struct amdgpu_ib *ib,
6163                                           unsigned vmid, bool ctx_switch)
6164 {
6165         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6166
6167         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6168         amdgpu_ring_write(ring,
6169 #ifdef __BIG_ENDIAN
6170                                 (2 << 0) |
6171 #endif
6172                                 (ib->gpu_addr & 0xFFFFFFFC));
6173         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6174         amdgpu_ring_write(ring, control);
6175 }
6176
6177 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6178                                          u64 seq, unsigned flags)
6179 {
6180         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6181         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6182
6183         /* EVENT_WRITE_EOP - flush caches, send int */
6184         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6185         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6186                                  EOP_TC_ACTION_EN |
6187                                  EOP_TC_WB_ACTION_EN |
6188                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6189                                  EVENT_INDEX(5)));
6190         amdgpu_ring_write(ring, addr & 0xfffffffc);
6191         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6192                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6193         amdgpu_ring_write(ring, lower_32_bits(seq));
6194         amdgpu_ring_write(ring, upper_32_bits(seq));
6195
6196 }
6197
6198 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6199 {
6200         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6201         uint32_t seq = ring->fence_drv.sync_seq;
6202         uint64_t addr = ring->fence_drv.gpu_addr;
6203
6204         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6205         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6206                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6207                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6208         amdgpu_ring_write(ring, addr & 0xfffffffc);
6209         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6210         amdgpu_ring_write(ring, seq);
6211         amdgpu_ring_write(ring, 0xffffffff);
6212         amdgpu_ring_write(ring, 4); /* poll interval */
6213 }
6214
6215 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6216                                         unsigned vmid, uint64_t pd_addr)
6217 {
6218         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6219
6220         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6221
6222         /* wait for the invalidate to complete */
6223         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6224         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6225                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6226                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6227         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6228         amdgpu_ring_write(ring, 0);
6229         amdgpu_ring_write(ring, 0); /* ref */
6230         amdgpu_ring_write(ring, 0); /* mask */
6231         amdgpu_ring_write(ring, 0x20); /* poll interval */
6232
6233         /* compute doesn't have PFP */
6234         if (usepfp) {
6235                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6236                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6237                 amdgpu_ring_write(ring, 0x0);
6238         }
6239 }
6240
6241 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6242 {
6243         return ring->adev->wb.wb[ring->wptr_offs];
6244 }
6245
6246 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6247 {
6248         struct amdgpu_device *adev = ring->adev;
6249
6250         /* XXX check if swapping is necessary on BE */
6251         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6252         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6253 }
6254
6255 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6256                                            bool acquire)
6257 {
6258         struct amdgpu_device *adev = ring->adev;
6259         int pipe_num, tmp, reg;
6260         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6261
6262         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6263
6264         /* first me only has 2 entries, GFX and HP3D */
6265         if (ring->me > 0)
6266                 pipe_num -= 2;
6267
6268         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6269         tmp = RREG32(reg);
6270         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6271         WREG32(reg, tmp);
6272 }
6273
6274 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6275                                             struct amdgpu_ring *ring,
6276                                             bool acquire)
6277 {
6278         int i, pipe;
6279         bool reserve;
6280         struct amdgpu_ring *iring;
6281
6282         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6283         pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
6284         if (acquire)
6285                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6286         else
6287                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6288
6289         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6290                 /* Clear all reservations - everyone reacquires all resources */
6291                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6292                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6293                                                        true);
6294
6295                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6296                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6297                                                        true);
6298         } else {
6299                 /* Lower all pipes without a current reservation */
6300                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6301                         iring = &adev->gfx.gfx_ring[i];
6302                         pipe = amdgpu_gfx_queue_to_bit(adev,
6303                                                        iring->me,
6304                                                        iring->pipe,
6305                                                        0);
6306                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6307                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6308                 }
6309
6310                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6311                         iring = &adev->gfx.compute_ring[i];
6312                         pipe = amdgpu_gfx_queue_to_bit(adev,
6313                                                        iring->me,
6314                                                        iring->pipe,
6315                                                        0);
6316                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6317                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6318                 }
6319         }
6320
6321         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6322 }
6323
6324 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6325                                       struct amdgpu_ring *ring,
6326                                       bool acquire)
6327 {
6328         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6329         uint32_t queue_priority = acquire ? 0xf : 0x0;
6330
6331         mutex_lock(&adev->srbm_mutex);
6332         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6333
6334         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6335         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6336
6337         vi_srbm_select(adev, 0, 0, 0, 0);
6338         mutex_unlock(&adev->srbm_mutex);
6339 }
6340 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6341                                                enum drm_sched_priority priority)
6342 {
6343         struct amdgpu_device *adev = ring->adev;
6344         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6345
6346         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6347                 return;
6348
6349         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6350         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6351 }
6352
6353 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6354                                              u64 addr, u64 seq,
6355                                              unsigned flags)
6356 {
6357         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6358         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6359
6360         /* RELEASE_MEM - flush caches, send int */
6361         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6362         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6363                                  EOP_TC_ACTION_EN |
6364                                  EOP_TC_WB_ACTION_EN |
6365                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6366                                  EVENT_INDEX(5)));
6367         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6368         amdgpu_ring_write(ring, addr & 0xfffffffc);
6369         amdgpu_ring_write(ring, upper_32_bits(addr));
6370         amdgpu_ring_write(ring, lower_32_bits(seq));
6371         amdgpu_ring_write(ring, upper_32_bits(seq));
6372 }
6373
6374 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6375                                          u64 seq, unsigned int flags)
6376 {
6377         /* we only allocate 32bit for each seq wb address */
6378         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6379
6380         /* write fence seq to the "addr" */
6381         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6382         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6383                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6384         amdgpu_ring_write(ring, lower_32_bits(addr));
6385         amdgpu_ring_write(ring, upper_32_bits(addr));
6386         amdgpu_ring_write(ring, lower_32_bits(seq));
6387
6388         if (flags & AMDGPU_FENCE_FLAG_INT) {
6389                 /* set register to trigger INT */
6390                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6391                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6392                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6393                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6394                 amdgpu_ring_write(ring, 0);
6395                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6396         }
6397 }
6398
6399 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6400 {
6401         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6402         amdgpu_ring_write(ring, 0);
6403 }
6404
6405 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6406 {
6407         uint32_t dw2 = 0;
6408
6409         if (amdgpu_sriov_vf(ring->adev))
6410                 gfx_v8_0_ring_emit_ce_meta(ring);
6411
6412         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6413         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6414                 gfx_v8_0_ring_emit_vgt_flush(ring);
6415                 /* set load_global_config & load_global_uconfig */
6416                 dw2 |= 0x8001;
6417                 /* set load_cs_sh_regs */
6418                 dw2 |= 0x01000000;
6419                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6420                 dw2 |= 0x10002;
6421
6422                 /* set load_ce_ram if preamble presented */
6423                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6424                         dw2 |= 0x10000000;
6425         } else {
6426                 /* still load_ce_ram if this is the first time preamble presented
6427                  * although there is no context switch happens.
6428                  */
6429                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6430                         dw2 |= 0x10000000;
6431         }
6432
6433         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6434         amdgpu_ring_write(ring, dw2);
6435         amdgpu_ring_write(ring, 0);
6436 }
6437
6438 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6439 {
6440         unsigned ret;
6441
6442         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6443         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6444         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6445         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6446         ret = ring->wptr & ring->buf_mask;
6447         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6448         return ret;
6449 }
6450
6451 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6452 {
6453         unsigned cur;
6454
6455         BUG_ON(offset > ring->buf_mask);
6456         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6457
6458         cur = (ring->wptr & ring->buf_mask) - 1;
6459         if (likely(cur > offset))
6460                 ring->ring[offset] = cur - offset;
6461         else
6462                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6463 }
6464
6465 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6466 {
6467         struct amdgpu_device *adev = ring->adev;
6468
6469         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6470         amdgpu_ring_write(ring, 0 |     /* src: register*/
6471                                 (5 << 8) |      /* dst: memory */
6472                                 (1 << 20));     /* write confirm */
6473         amdgpu_ring_write(ring, reg);
6474         amdgpu_ring_write(ring, 0);
6475         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6476                                 adev->virt.reg_val_offs * 4));
6477         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6478                                 adev->virt.reg_val_offs * 4));
6479 }
6480
6481 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6482                                   uint32_t val)
6483 {
6484         uint32_t cmd;
6485
6486         switch (ring->funcs->type) {
6487         case AMDGPU_RING_TYPE_GFX:
6488                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6489                 break;
6490         case AMDGPU_RING_TYPE_KIQ:
6491                 cmd = 1 << 16; /* no inc addr */
6492                 break;
6493         default:
6494                 cmd = WR_CONFIRM;
6495                 break;
6496         }
6497
6498         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6499         amdgpu_ring_write(ring, cmd);
6500         amdgpu_ring_write(ring, reg);
6501         amdgpu_ring_write(ring, 0);
6502         amdgpu_ring_write(ring, val);
6503 }
6504
6505 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6506 {
6507         struct amdgpu_device *adev = ring->adev;
6508         uint32_t value = 0;
6509
6510         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6511         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6512         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6513         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6514         WREG32(mmSQ_CMD, value);
6515 }
6516
6517 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6518                                                  enum amdgpu_interrupt_state state)
6519 {
6520         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6521                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6522 }
6523
6524 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6525                                                      int me, int pipe,
6526                                                      enum amdgpu_interrupt_state state)
6527 {
6528         u32 mec_int_cntl, mec_int_cntl_reg;
6529
6530         /*
6531          * amdgpu controls only the first MEC. That's why this function only
6532          * handles the setting of interrupts for this specific MEC. All other
6533          * pipes' interrupts are set by amdkfd.
6534          */
6535
6536         if (me == 1) {
6537                 switch (pipe) {
6538                 case 0:
6539                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6540                         break;
6541                 case 1:
6542                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6543                         break;
6544                 case 2:
6545                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6546                         break;
6547                 case 3:
6548                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6549                         break;
6550                 default:
6551                         DRM_DEBUG("invalid pipe %d\n", pipe);
6552                         return;
6553                 }
6554         } else {
6555                 DRM_DEBUG("invalid me %d\n", me);
6556                 return;
6557         }
6558
6559         switch (state) {
6560         case AMDGPU_IRQ_STATE_DISABLE:
6561                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6562                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6563                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6564                 break;
6565         case AMDGPU_IRQ_STATE_ENABLE:
6566                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6567                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6568                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6569                 break;
6570         default:
6571                 break;
6572         }
6573 }
6574
6575 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6576                                              struct amdgpu_irq_src *source,
6577                                              unsigned type,
6578                                              enum amdgpu_interrupt_state state)
6579 {
6580         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6581                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6582
6583         return 0;
6584 }
6585
6586 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6587                                               struct amdgpu_irq_src *source,
6588                                               unsigned type,
6589                                               enum amdgpu_interrupt_state state)
6590 {
6591         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6592                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6593
6594         return 0;
6595 }
6596
6597 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6598                                             struct amdgpu_irq_src *src,
6599                                             unsigned type,
6600                                             enum amdgpu_interrupt_state state)
6601 {
6602         switch (type) {
6603         case AMDGPU_CP_IRQ_GFX_EOP:
6604                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6605                 break;
6606         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6607                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6608                 break;
6609         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6610                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6611                 break;
6612         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6613                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6614                 break;
6615         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6616                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6617                 break;
6618         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6619                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6620                 break;
6621         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6622                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6623                 break;
6624         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6625                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6626                 break;
6627         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6628                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6629                 break;
6630         default:
6631                 break;
6632         }
6633         return 0;
6634 }
6635
6636 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6637                                          struct amdgpu_irq_src *source,
6638                                          unsigned int type,
6639                                          enum amdgpu_interrupt_state state)
6640 {
6641         int enable_flag;
6642
6643         switch (state) {
6644         case AMDGPU_IRQ_STATE_DISABLE:
6645                 enable_flag = 0;
6646                 break;
6647
6648         case AMDGPU_IRQ_STATE_ENABLE:
6649                 enable_flag = 1;
6650                 break;
6651
6652         default:
6653                 return -EINVAL;
6654         }
6655
6656         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6657         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6658         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6659         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6660         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6661         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6662                      enable_flag);
6663         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6664                      enable_flag);
6665         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6666                      enable_flag);
6667         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6668                      enable_flag);
6669         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6670                      enable_flag);
6671         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6672                      enable_flag);
6673         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6674                      enable_flag);
6675         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6676                      enable_flag);
6677
6678         return 0;
6679 }
6680
6681 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6682                                      struct amdgpu_irq_src *source,
6683                                      unsigned int type,
6684                                      enum amdgpu_interrupt_state state)
6685 {
6686         int enable_flag;
6687
6688         switch (state) {
6689         case AMDGPU_IRQ_STATE_DISABLE:
6690                 enable_flag = 1;
6691                 break;
6692
6693         case AMDGPU_IRQ_STATE_ENABLE:
6694                 enable_flag = 0;
6695                 break;
6696
6697         default:
6698                 return -EINVAL;
6699         }
6700
6701         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6702                      enable_flag);
6703
6704         return 0;
6705 }
6706
6707 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6708                             struct amdgpu_irq_src *source,
6709                             struct amdgpu_iv_entry *entry)
6710 {
6711         int i;
6712         u8 me_id, pipe_id, queue_id;
6713         struct amdgpu_ring *ring;
6714
6715         DRM_DEBUG("IH: CP EOP\n");
6716         me_id = (entry->ring_id & 0x0c) >> 2;
6717         pipe_id = (entry->ring_id & 0x03) >> 0;
6718         queue_id = (entry->ring_id & 0x70) >> 4;
6719
6720         switch (me_id) {
6721         case 0:
6722                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6723                 break;
6724         case 1:
6725         case 2:
6726                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6727                         ring = &adev->gfx.compute_ring[i];
6728                         /* Per-queue interrupt is supported for MEC starting from VI.
6729                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6730                           */
6731                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6732                                 amdgpu_fence_process(ring);
6733                 }
6734                 break;
6735         }
6736         return 0;
6737 }
6738
6739 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6740                                  struct amdgpu_irq_src *source,
6741                                  struct amdgpu_iv_entry *entry)
6742 {
6743         DRM_ERROR("Illegal register access in command stream\n");
6744         schedule_work(&adev->reset_work);
6745         return 0;
6746 }
6747
6748 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6749                                   struct amdgpu_irq_src *source,
6750                                   struct amdgpu_iv_entry *entry)
6751 {
6752         DRM_ERROR("Illegal instruction in command stream\n");
6753         schedule_work(&adev->reset_work);
6754         return 0;
6755 }
6756
6757 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6758                                      struct amdgpu_irq_src *source,
6759                                      struct amdgpu_iv_entry *entry)
6760 {
6761         DRM_ERROR("CP EDC/ECC error detected.");
6762         return 0;
6763 }
6764
6765 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6766 {
6767         u32 enc, se_id, sh_id, cu_id;
6768         char type[20];
6769         int sq_edc_source = -1;
6770
6771         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6772         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6773
6774         switch (enc) {
6775                 case 0:
6776                         DRM_INFO("SQ general purpose intr detected:"
6777                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6778                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6779                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6780                                         "wlt %d, thread_trace %d.\n",
6781                                         se_id,
6782                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6783                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6784                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6785                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6786                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6787                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6788                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6789                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6790                                         );
6791                         break;
6792                 case 1:
6793                 case 2:
6794
6795                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6796                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6797
6798                         /*
6799                          * This function can be called either directly from ISR
6800                          * or from BH in which case we can access SQ_EDC_INFO
6801                          * instance
6802                          */
6803                         if (in_task()) {
6804                                 mutex_lock(&adev->grbm_idx_mutex);
6805                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6806
6807                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6808
6809                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6810                                 mutex_unlock(&adev->grbm_idx_mutex);
6811                         }
6812
6813                         if (enc == 1)
6814                                 sprintf(type, "instruction intr");
6815                         else
6816                                 sprintf(type, "EDC/ECC error");
6817
6818                         DRM_INFO(
6819                                 "SQ %s detected: "
6820                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6821                                         "trap %s, sq_ed_info.source %s.\n",
6822                                         type, se_id, sh_id, cu_id,
6823                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6824                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6825                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6826                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6827                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6828                                 );
6829                         break;
6830                 default:
6831                         DRM_ERROR("SQ invalid encoding type\n.");
6832         }
6833 }
6834
6835 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6836 {
6837
6838         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6839         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6840
6841         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6842 }
6843
6844 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6845                            struct amdgpu_irq_src *source,
6846                            struct amdgpu_iv_entry *entry)
6847 {
6848         unsigned ih_data = entry->src_data[0];
6849
6850         /*
6851          * Try to submit work so SQ_EDC_INFO can be accessed from
6852          * BH. If previous work submission hasn't finished yet
6853          * just print whatever info is possible directly from the ISR.
6854          */
6855         if (work_pending(&adev->gfx.sq_work.work)) {
6856                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6857         } else {
6858                 adev->gfx.sq_work.ih_data = ih_data;
6859                 schedule_work(&adev->gfx.sq_work.work);
6860         }
6861
6862         return 0;
6863 }
6864
6865 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6866         .name = "gfx_v8_0",
6867         .early_init = gfx_v8_0_early_init,
6868         .late_init = gfx_v8_0_late_init,
6869         .sw_init = gfx_v8_0_sw_init,
6870         .sw_fini = gfx_v8_0_sw_fini,
6871         .hw_init = gfx_v8_0_hw_init,
6872         .hw_fini = gfx_v8_0_hw_fini,
6873         .suspend = gfx_v8_0_suspend,
6874         .resume = gfx_v8_0_resume,
6875         .is_idle = gfx_v8_0_is_idle,
6876         .wait_for_idle = gfx_v8_0_wait_for_idle,
6877         .check_soft_reset = gfx_v8_0_check_soft_reset,
6878         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6879         .soft_reset = gfx_v8_0_soft_reset,
6880         .post_soft_reset = gfx_v8_0_post_soft_reset,
6881         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6882         .set_powergating_state = gfx_v8_0_set_powergating_state,
6883         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6884 };
6885
6886 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6887         .type = AMDGPU_RING_TYPE_GFX,
6888         .align_mask = 0xff,
6889         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6890         .support_64bit_ptrs = false,
6891         .get_rptr = gfx_v8_0_ring_get_rptr,
6892         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6893         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6894         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6895                 5 +  /* COND_EXEC */
6896                 7 +  /* PIPELINE_SYNC */
6897                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6898                 8 +  /* FENCE for VM_FLUSH */
6899                 20 + /* GDS switch */
6900                 4 + /* double SWITCH_BUFFER,
6901                        the first COND_EXEC jump to the place just
6902                            prior to this double SWITCH_BUFFER  */
6903                 5 + /* COND_EXEC */
6904                 7 +      /*     HDP_flush */
6905                 4 +      /*     VGT_flush */
6906                 14 + /* CE_META */
6907                 31 + /* DE_META */
6908                 3 + /* CNTX_CTRL */
6909                 5 + /* HDP_INVL */
6910                 8 + 8 + /* FENCE x2 */
6911                 2, /* SWITCH_BUFFER */
6912         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6913         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6914         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6915         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6916         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6917         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6918         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6919         .test_ring = gfx_v8_0_ring_test_ring,
6920         .test_ib = gfx_v8_0_ring_test_ib,
6921         .insert_nop = amdgpu_ring_insert_nop,
6922         .pad_ib = amdgpu_ring_generic_pad_ib,
6923         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6924         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6925         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6926         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6927         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6928         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6929 };
6930
6931 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6932         .type = AMDGPU_RING_TYPE_COMPUTE,
6933         .align_mask = 0xff,
6934         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6935         .support_64bit_ptrs = false,
6936         .get_rptr = gfx_v8_0_ring_get_rptr,
6937         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6938         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6939         .emit_frame_size =
6940                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6941                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6942                 5 + /* hdp_invalidate */
6943                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6944                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6945                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6946         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6947         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6948         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6949         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6950         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6951         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6952         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6953         .test_ring = gfx_v8_0_ring_test_ring,
6954         .test_ib = gfx_v8_0_ring_test_ib,
6955         .insert_nop = amdgpu_ring_insert_nop,
6956         .pad_ib = amdgpu_ring_generic_pad_ib,
6957         .set_priority = gfx_v8_0_ring_set_priority_compute,
6958         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6959 };
6960
6961 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6962         .type = AMDGPU_RING_TYPE_KIQ,
6963         .align_mask = 0xff,
6964         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6965         .support_64bit_ptrs = false,
6966         .get_rptr = gfx_v8_0_ring_get_rptr,
6967         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6968         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6969         .emit_frame_size =
6970                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6971                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6972                 5 + /* hdp_invalidate */
6973                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6974                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6975                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6976         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6977         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6978         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6979         .test_ring = gfx_v8_0_ring_test_ring,
6980         .test_ib = gfx_v8_0_ring_test_ib,
6981         .insert_nop = amdgpu_ring_insert_nop,
6982         .pad_ib = amdgpu_ring_generic_pad_ib,
6983         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6984         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6985 };
6986
6987 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6988 {
6989         int i;
6990
6991         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6992
6993         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6994                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6995
6996         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6997                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6998 }
6999
7000 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7001         .set = gfx_v8_0_set_eop_interrupt_state,
7002         .process = gfx_v8_0_eop_irq,
7003 };
7004
7005 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7006         .set = gfx_v8_0_set_priv_reg_fault_state,
7007         .process = gfx_v8_0_priv_reg_irq,
7008 };
7009
7010 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7011         .set = gfx_v8_0_set_priv_inst_fault_state,
7012         .process = gfx_v8_0_priv_inst_irq,
7013 };
7014
7015 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7016         .set = gfx_v8_0_set_cp_ecc_int_state,
7017         .process = gfx_v8_0_cp_ecc_error_irq,
7018 };
7019
7020 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7021         .set = gfx_v8_0_set_sq_int_state,
7022         .process = gfx_v8_0_sq_irq,
7023 };
7024
7025 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7026 {
7027         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7028         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7029
7030         adev->gfx.priv_reg_irq.num_types = 1;
7031         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7032
7033         adev->gfx.priv_inst_irq.num_types = 1;
7034         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7035
7036         adev->gfx.cp_ecc_error_irq.num_types = 1;
7037         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7038
7039         adev->gfx.sq_irq.num_types = 1;
7040         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7041 }
7042
7043 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7044 {
7045         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7046 }
7047
7048 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7049 {
7050         /* init asci gds info */
7051         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7052         adev->gds.gws.total_size = 64;
7053         adev->gds.oa.total_size = 16;
7054
7055         if (adev->gds.mem.total_size == 64 * 1024) {
7056                 adev->gds.mem.gfx_partition_size = 4096;
7057                 adev->gds.mem.cs_partition_size = 4096;
7058
7059                 adev->gds.gws.gfx_partition_size = 4;
7060                 adev->gds.gws.cs_partition_size = 4;
7061
7062                 adev->gds.oa.gfx_partition_size = 4;
7063                 adev->gds.oa.cs_partition_size = 1;
7064         } else {
7065                 adev->gds.mem.gfx_partition_size = 1024;
7066                 adev->gds.mem.cs_partition_size = 1024;
7067
7068                 adev->gds.gws.gfx_partition_size = 16;
7069                 adev->gds.gws.cs_partition_size = 16;
7070
7071                 adev->gds.oa.gfx_partition_size = 4;
7072                 adev->gds.oa.cs_partition_size = 4;
7073         }
7074 }
7075
7076 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7077                                                  u32 bitmap)
7078 {
7079         u32 data;
7080
7081         if (!bitmap)
7082                 return;
7083
7084         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7085         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7086
7087         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7088 }
7089
7090 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7091 {
7092         u32 data, mask;
7093
7094         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7095                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7096
7097         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7098
7099         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7100 }
7101
7102 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7103 {
7104         int i, j, k, counter, active_cu_number = 0;
7105         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7106         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7107         unsigned disable_masks[4 * 2];
7108         u32 ao_cu_num;
7109
7110         memset(cu_info, 0, sizeof(*cu_info));
7111
7112         if (adev->flags & AMD_IS_APU)
7113                 ao_cu_num = 2;
7114         else
7115                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7116
7117         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7118
7119         mutex_lock(&adev->grbm_idx_mutex);
7120         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7121                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7122                         mask = 1;
7123                         ao_bitmap = 0;
7124                         counter = 0;
7125                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7126                         if (i < 4 && j < 2)
7127                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7128                                         adev, disable_masks[i * 2 + j]);
7129                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7130                         cu_info->bitmap[i][j] = bitmap;
7131
7132                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7133                                 if (bitmap & mask) {
7134                                         if (counter < ao_cu_num)
7135                                                 ao_bitmap |= mask;
7136                                         counter ++;
7137                                 }
7138                                 mask <<= 1;
7139                         }
7140                         active_cu_number += counter;
7141                         if (i < 2 && j < 2)
7142                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7143                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7144                 }
7145         }
7146         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7147         mutex_unlock(&adev->grbm_idx_mutex);
7148
7149         cu_info->number = active_cu_number;
7150         cu_info->ao_cu_mask = ao_cu_mask;
7151         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7152         cu_info->max_waves_per_simd = 10;
7153         cu_info->max_scratch_slots_per_cu = 32;
7154         cu_info->wave_front_size = 64;
7155         cu_info->lds_size = 64;
7156 }
7157
7158 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7159 {
7160         .type = AMD_IP_BLOCK_TYPE_GFX,
7161         .major = 8,
7162         .minor = 0,
7163         .rev = 0,
7164         .funcs = &gfx_v8_0_ip_funcs,
7165 };
7166
7167 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7168 {
7169         .type = AMD_IP_BLOCK_TYPE_GFX,
7170         .major = 8,
7171         .minor = 1,
7172         .rev = 0,
7173         .funcs = &gfx_v8_0_ip_funcs,
7174 };
7175
7176 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7177 {
7178         uint64_t ce_payload_addr;
7179         int cnt_ce;
7180         union {
7181                 struct vi_ce_ib_state regular;
7182                 struct vi_ce_ib_state_chained_ib chained;
7183         } ce_payload = {};
7184
7185         if (ring->adev->virt.chained_ib_support) {
7186                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7187                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7188                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7189         } else {
7190                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7191                         offsetof(struct vi_gfx_meta_data, ce_payload);
7192                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7193         }
7194
7195         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7196         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7197                                 WRITE_DATA_DST_SEL(8) |
7198                                 WR_CONFIRM) |
7199                                 WRITE_DATA_CACHE_POLICY(0));
7200         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7201         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7202         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7203 }
7204
7205 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7206 {
7207         uint64_t de_payload_addr, gds_addr, csa_addr;
7208         int cnt_de;
7209         union {
7210                 struct vi_de_ib_state regular;
7211                 struct vi_de_ib_state_chained_ib chained;
7212         } de_payload = {};
7213
7214         csa_addr = amdgpu_csa_vaddr(ring->adev);
7215         gds_addr = csa_addr + 4096;
7216         if (ring->adev->virt.chained_ib_support) {
7217                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7218                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7219                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7220                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7221         } else {
7222                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7223                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7224                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7225                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7226         }
7227
7228         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7229         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7230                                 WRITE_DATA_DST_SEL(8) |
7231                                 WR_CONFIRM) |
7232                                 WRITE_DATA_CACHE_POLICY(0));
7233         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7234         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7235         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7236 }