Merge tag 'drm-fixes-5.5-2019-12-12' of git://people.freedesktop.org/~agd5f/linux...
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "vi.h"
33 #include "vi_structs.h"
34 #include "vid.h"
35 #include "amdgpu_ucode.h"
36 #include "amdgpu_atombios.h"
37 #include "atombios_i2c.h"
38 #include "clearstate_vi.h"
39
40 #include "gmc/gmc_8_2_d.h"
41 #include "gmc/gmc_8_2_sh_mask.h"
42
43 #include "oss/oss_3_0_d.h"
44 #include "oss/oss_3_0_sh_mask.h"
45
46 #include "bif/bif_5_0_d.h"
47 #include "bif/bif_5_0_sh_mask.h"
48 #include "gca/gfx_8_0_d.h"
49 #include "gca/gfx_8_0_enum.h"
50 #include "gca/gfx_8_0_sh_mask.h"
51
52 #include "dce/dce_10_0_d.h"
53 #include "dce/dce_10_0_sh_mask.h"
54
55 #include "smu/smu_7_1_3_d.h"
56
57 #include "ivsrcid/ivsrcid_vislands30.h"
58
59 #define GFX8_NUM_GFX_RINGS     1
60 #define GFX8_MEC_HPD_SIZE 4096
61
62 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
63 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
64 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
65 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
66
67 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
68 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
69 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
70 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
71 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
72 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
73 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
74 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
75 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
76
77 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
78 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
79 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
80 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
81 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
82 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
83
84 /* BPM SERDES CMD */
85 #define SET_BPM_SERDES_CMD    1
86 #define CLE_BPM_SERDES_CMD    0
87
88 /* BPM Register Address*/
89 enum {
90         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
91         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
92         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
93         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
94         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
95         BPM_REG_FGCG_MAX
96 };
97
98 #define RLC_FormatDirectRegListLength        14
99
100 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
101 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
103 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
104 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
108 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
110 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
111 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
114 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
116 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
117 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
121 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
123 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
124 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
127 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
128 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
129 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
130 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
132
133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
134 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin");
141 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
142 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin");
143 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
144
145 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
146 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
148 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin");
149 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
150 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin");
151 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
152 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin");
153 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
154 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin");
155 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
156
157 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
158 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin");
159 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
160 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin");
161 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
162 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin");
163 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
164 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin");
165 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
166 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin");
167 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
168
169 MODULE_FIRMWARE("amdgpu/vegam_ce.bin");
170 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin");
171 MODULE_FIRMWARE("amdgpu/vegam_me.bin");
172 MODULE_FIRMWARE("amdgpu/vegam_mec.bin");
173 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin");
174 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin");
175
176 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
177 {
178         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
179         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
180         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
181         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
182         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
183         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
184         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
185         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
186         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
187         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
188         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
189         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
190         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
191         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
192         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
193         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
194 };
195
196 static const u32 golden_settings_tonga_a11[] =
197 {
198         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
199         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
200         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
201         mmGB_GPU_ID, 0x0000000f, 0x00000000,
202         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
203         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
204         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
205         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
206         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
207         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
208         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
209         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
210         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
211         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
212         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
213         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
214 };
215
216 static const u32 tonga_golden_common_all[] =
217 {
218         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
219         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
220         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
221         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
222         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
223         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
224         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
225         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
226 };
227
228 static const u32 tonga_mgcg_cgcg_init[] =
229 {
230         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
231         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
232         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
233         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
234         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
235         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
236         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
237         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
238         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
239         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
240         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
241         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
242         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
243         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
244         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
245         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
246         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
247         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
248         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
249         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
250         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
251         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
252         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
253         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
254         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
255         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
256         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
257         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
258         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
259         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
260         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
262         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
263         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
264         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
265         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
266         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
267         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
268         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
269         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
270         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
271         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
272         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
273         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
274         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
275         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
276         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
277         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
278         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
279         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
280         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
281         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
282         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
283         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
284         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
285         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
286         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
287         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
288         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
289         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
290         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
291         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
292         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
293         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
294         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
295         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
296         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
297         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
298         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
299         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
300         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
301         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
302         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
303         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
304         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
305 };
306
307 static const u32 golden_settings_vegam_a11[] =
308 {
309         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
310         mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000,
311         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
312         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
313         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
314         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
315         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a,
316         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e,
317         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
318         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
319         mmSQ_CONFIG, 0x07f80000, 0x01180000,
320         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
321         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
322         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
323         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
324         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054,
325         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
326 };
327
328 static const u32 vegam_golden_common_all[] =
329 {
330         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
331         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
332         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
335         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
336 };
337
338 static const u32 golden_settings_polaris11_a11[] =
339 {
340         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
341         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
342         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
343         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
344         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
345         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
346         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
347         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
348         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
349         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
350         mmSQ_CONFIG, 0x07f80000, 0x01180000,
351         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
352         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
353         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
354         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
355         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
356         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
357 };
358
359 static const u32 polaris11_golden_common_all[] =
360 {
361         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
362         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
363         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
364         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
365         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
366         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
367 };
368
369 static const u32 golden_settings_polaris10_a11[] =
370 {
371         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
372         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
373         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
374         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
375         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
376         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
377         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
378         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
379         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
380         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
381         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
382         mmSQ_CONFIG, 0x07f80000, 0x07180000,
383         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
384         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
385         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
386         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
387         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
388 };
389
390 static const u32 polaris10_golden_common_all[] =
391 {
392         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
393         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
394         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
395         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
396         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
397         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
398         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
399         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
400 };
401
402 static const u32 fiji_golden_common_all[] =
403 {
404         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
405         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
406         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
407         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
408         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
409         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
410         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
411         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
412         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
413         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
414 };
415
416 static const u32 golden_settings_fiji_a10[] =
417 {
418         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
419         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
420         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
421         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
428         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
429 };
430
431 static const u32 fiji_mgcg_cgcg_init[] =
432 {
433         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
436         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
437         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
438         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
439         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
440         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
441         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
442         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
443         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
444         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
445         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
446         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
447         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
448         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
451         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
452         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
453         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
454         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
455         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
457         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
458         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
459         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
460         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
461         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
462         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
463         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
464         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
465         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
466         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
467         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
468 };
469
470 static const u32 golden_settings_iceland_a11[] =
471 {
472         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
473         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
474         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
475         mmGB_GPU_ID, 0x0000000f, 0x00000000,
476         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
477         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
478         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
479         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
480         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
481         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
482         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
483         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
484         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
485         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
486         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
487         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
488 };
489
490 static const u32 iceland_golden_common_all[] =
491 {
492         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
493         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
494         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
495         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
496         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
497         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
498         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
499         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
500 };
501
502 static const u32 iceland_mgcg_cgcg_init[] =
503 {
504         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
505         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
506         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
507         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
508         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
509         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
510         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
511         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
512         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
513         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
514         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
515         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
516         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
517         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
518         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
519         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
520         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
521         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
522         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
523         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
524         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
525         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
526         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
527         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
528         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
529         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
530         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
531         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
533         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
534         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
535         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
536         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
537         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
538         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
539         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
540         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
541         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
542         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
543         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
544         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
545         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
546         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
547         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
548         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
549         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
550         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
551         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
552         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
553         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
554         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
555         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
556         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
557         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
558         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
559         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
560         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
563         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
566         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
567         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
568 };
569
570 static const u32 cz_golden_settings_a11[] =
571 {
572         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
573         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
574         mmGB_GPU_ID, 0x0000000f, 0x00000000,
575         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
576         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
577         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
578         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
579         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
580         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
581         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
582         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
583         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
584 };
585
586 static const u32 cz_golden_common_all[] =
587 {
588         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
589         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
590         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
591         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
592         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
593         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
594         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
595         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF
596 };
597
598 static const u32 cz_mgcg_cgcg_init[] =
599 {
600         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
601         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
602         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
603         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
604         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
605         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
606         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
607         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
608         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
609         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
610         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
611         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
612         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
613         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
614         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
615         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
616         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
617         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
618         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
619         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
620         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
621         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
622         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
623         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
624         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
625         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
626         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
627         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
628         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
629         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
630         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
631         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
632         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
633         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
634         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
635         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
636         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
637         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
638         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
639         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
640         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
641         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
642         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
643         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
644         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
645         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
646         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
647         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
648         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
649         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
650         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
651         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
652         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
653         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
654         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
655         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
656         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
657         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
658         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
659         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
660         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
661         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
662         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
663         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
664         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
665         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
666         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
667         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
668         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
669         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
670         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
671         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
672         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
673         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
674         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
675 };
676
677 static const u32 stoney_golden_settings_a11[] =
678 {
679         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
680         mmGB_GPU_ID, 0x0000000f, 0x00000000,
681         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
682         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
683         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
684         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
685         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
686         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
687         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
688         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
689 };
690
691 static const u32 stoney_golden_common_all[] =
692 {
693         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
694         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
695         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
696         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
697         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
698         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
699         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF,
700         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF,
701 };
702
703 static const u32 stoney_mgcg_cgcg_init[] =
704 {
705         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
706         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
707         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
708         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
709         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
710 };
711
712
713 static const char * const sq_edc_source_names[] = {
714         "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred",
715         "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch",
716         "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return",
717         "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR",
718         "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS",
719         "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS",
720         "SQ_EDC_INFO_SOURCE_TA: EDC source is TA",
721 };
722
723 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
724 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
725 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
726 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
727 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
728 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
729 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring);
730 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring);
731
732 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
733 {
734         switch (adev->asic_type) {
735         case CHIP_TOPAZ:
736                 amdgpu_device_program_register_sequence(adev,
737                                                         iceland_mgcg_cgcg_init,
738                                                         ARRAY_SIZE(iceland_mgcg_cgcg_init));
739                 amdgpu_device_program_register_sequence(adev,
740                                                         golden_settings_iceland_a11,
741                                                         ARRAY_SIZE(golden_settings_iceland_a11));
742                 amdgpu_device_program_register_sequence(adev,
743                                                         iceland_golden_common_all,
744                                                         ARRAY_SIZE(iceland_golden_common_all));
745                 break;
746         case CHIP_FIJI:
747                 amdgpu_device_program_register_sequence(adev,
748                                                         fiji_mgcg_cgcg_init,
749                                                         ARRAY_SIZE(fiji_mgcg_cgcg_init));
750                 amdgpu_device_program_register_sequence(adev,
751                                                         golden_settings_fiji_a10,
752                                                         ARRAY_SIZE(golden_settings_fiji_a10));
753                 amdgpu_device_program_register_sequence(adev,
754                                                         fiji_golden_common_all,
755                                                         ARRAY_SIZE(fiji_golden_common_all));
756                 break;
757
758         case CHIP_TONGA:
759                 amdgpu_device_program_register_sequence(adev,
760                                                         tonga_mgcg_cgcg_init,
761                                                         ARRAY_SIZE(tonga_mgcg_cgcg_init));
762                 amdgpu_device_program_register_sequence(adev,
763                                                         golden_settings_tonga_a11,
764                                                         ARRAY_SIZE(golden_settings_tonga_a11));
765                 amdgpu_device_program_register_sequence(adev,
766                                                         tonga_golden_common_all,
767                                                         ARRAY_SIZE(tonga_golden_common_all));
768                 break;
769         case CHIP_VEGAM:
770                 amdgpu_device_program_register_sequence(adev,
771                                                         golden_settings_vegam_a11,
772                                                         ARRAY_SIZE(golden_settings_vegam_a11));
773                 amdgpu_device_program_register_sequence(adev,
774                                                         vegam_golden_common_all,
775                                                         ARRAY_SIZE(vegam_golden_common_all));
776                 break;
777         case CHIP_POLARIS11:
778         case CHIP_POLARIS12:
779                 amdgpu_device_program_register_sequence(adev,
780                                                         golden_settings_polaris11_a11,
781                                                         ARRAY_SIZE(golden_settings_polaris11_a11));
782                 amdgpu_device_program_register_sequence(adev,
783                                                         polaris11_golden_common_all,
784                                                         ARRAY_SIZE(polaris11_golden_common_all));
785                 break;
786         case CHIP_POLARIS10:
787                 amdgpu_device_program_register_sequence(adev,
788                                                         golden_settings_polaris10_a11,
789                                                         ARRAY_SIZE(golden_settings_polaris10_a11));
790                 amdgpu_device_program_register_sequence(adev,
791                                                         polaris10_golden_common_all,
792                                                         ARRAY_SIZE(polaris10_golden_common_all));
793                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
794                 if (adev->pdev->revision == 0xc7 &&
795                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
796                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
797                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
798                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
799                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
800                 }
801                 break;
802         case CHIP_CARRIZO:
803                 amdgpu_device_program_register_sequence(adev,
804                                                         cz_mgcg_cgcg_init,
805                                                         ARRAY_SIZE(cz_mgcg_cgcg_init));
806                 amdgpu_device_program_register_sequence(adev,
807                                                         cz_golden_settings_a11,
808                                                         ARRAY_SIZE(cz_golden_settings_a11));
809                 amdgpu_device_program_register_sequence(adev,
810                                                         cz_golden_common_all,
811                                                         ARRAY_SIZE(cz_golden_common_all));
812                 break;
813         case CHIP_STONEY:
814                 amdgpu_device_program_register_sequence(adev,
815                                                         stoney_mgcg_cgcg_init,
816                                                         ARRAY_SIZE(stoney_mgcg_cgcg_init));
817                 amdgpu_device_program_register_sequence(adev,
818                                                         stoney_golden_settings_a11,
819                                                         ARRAY_SIZE(stoney_golden_settings_a11));
820                 amdgpu_device_program_register_sequence(adev,
821                                                         stoney_golden_common_all,
822                                                         ARRAY_SIZE(stoney_golden_common_all));
823                 break;
824         default:
825                 break;
826         }
827 }
828
829 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
830 {
831         adev->gfx.scratch.num_reg = 8;
832         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
833         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
834 }
835
836 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
837 {
838         struct amdgpu_device *adev = ring->adev;
839         uint32_t scratch;
840         uint32_t tmp = 0;
841         unsigned i;
842         int r;
843
844         r = amdgpu_gfx_scratch_get(adev, &scratch);
845         if (r)
846                 return r;
847
848         WREG32(scratch, 0xCAFEDEAD);
849         r = amdgpu_ring_alloc(ring, 3);
850         if (r)
851                 goto error_free_scratch;
852
853         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
854         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
855         amdgpu_ring_write(ring, 0xDEADBEEF);
856         amdgpu_ring_commit(ring);
857
858         for (i = 0; i < adev->usec_timeout; i++) {
859                 tmp = RREG32(scratch);
860                 if (tmp == 0xDEADBEEF)
861                         break;
862                 udelay(1);
863         }
864
865         if (i >= adev->usec_timeout)
866                 r = -ETIMEDOUT;
867
868 error_free_scratch:
869         amdgpu_gfx_scratch_free(adev, scratch);
870         return r;
871 }
872
873 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
874 {
875         struct amdgpu_device *adev = ring->adev;
876         struct amdgpu_ib ib;
877         struct dma_fence *f = NULL;
878
879         unsigned int index;
880         uint64_t gpu_addr;
881         uint32_t tmp;
882         long r;
883
884         r = amdgpu_device_wb_get(adev, &index);
885         if (r)
886                 return r;
887
888         gpu_addr = adev->wb.gpu_addr + (index * 4);
889         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
890         memset(&ib, 0, sizeof(ib));
891         r = amdgpu_ib_get(adev, NULL, 16, &ib);
892         if (r)
893                 goto err1;
894
895         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
896         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
897         ib.ptr[2] = lower_32_bits(gpu_addr);
898         ib.ptr[3] = upper_32_bits(gpu_addr);
899         ib.ptr[4] = 0xDEADBEEF;
900         ib.length_dw = 5;
901
902         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
903         if (r)
904                 goto err2;
905
906         r = dma_fence_wait_timeout(f, false, timeout);
907         if (r == 0) {
908                 r = -ETIMEDOUT;
909                 goto err2;
910         } else if (r < 0) {
911                 goto err2;
912         }
913
914         tmp = adev->wb.wb[index];
915         if (tmp == 0xDEADBEEF)
916                 r = 0;
917         else
918                 r = -EINVAL;
919
920 err2:
921         amdgpu_ib_free(adev, &ib, NULL);
922         dma_fence_put(f);
923 err1:
924         amdgpu_device_wb_free(adev, index);
925         return r;
926 }
927
928
929 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev)
930 {
931         release_firmware(adev->gfx.pfp_fw);
932         adev->gfx.pfp_fw = NULL;
933         release_firmware(adev->gfx.me_fw);
934         adev->gfx.me_fw = NULL;
935         release_firmware(adev->gfx.ce_fw);
936         adev->gfx.ce_fw = NULL;
937         release_firmware(adev->gfx.rlc_fw);
938         adev->gfx.rlc_fw = NULL;
939         release_firmware(adev->gfx.mec_fw);
940         adev->gfx.mec_fw = NULL;
941         if ((adev->asic_type != CHIP_STONEY) &&
942             (adev->asic_type != CHIP_TOPAZ))
943                 release_firmware(adev->gfx.mec2_fw);
944         adev->gfx.mec2_fw = NULL;
945
946         kfree(adev->gfx.rlc.register_list_format);
947 }
948
949 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
950 {
951         const char *chip_name;
952         char fw_name[30];
953         int err;
954         struct amdgpu_firmware_info *info = NULL;
955         const struct common_firmware_header *header = NULL;
956         const struct gfx_firmware_header_v1_0 *cp_hdr;
957         const struct rlc_firmware_header_v2_0 *rlc_hdr;
958         unsigned int *tmp = NULL, i;
959
960         DRM_DEBUG("\n");
961
962         switch (adev->asic_type) {
963         case CHIP_TOPAZ:
964                 chip_name = "topaz";
965                 break;
966         case CHIP_TONGA:
967                 chip_name = "tonga";
968                 break;
969         case CHIP_CARRIZO:
970                 chip_name = "carrizo";
971                 break;
972         case CHIP_FIJI:
973                 chip_name = "fiji";
974                 break;
975         case CHIP_STONEY:
976                 chip_name = "stoney";
977                 break;
978         case CHIP_POLARIS10:
979                 chip_name = "polaris10";
980                 break;
981         case CHIP_POLARIS11:
982                 chip_name = "polaris11";
983                 break;
984         case CHIP_POLARIS12:
985                 chip_name = "polaris12";
986                 break;
987         case CHIP_VEGAM:
988                 chip_name = "vegam";
989                 break;
990         default:
991                 BUG();
992         }
993
994         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
995                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name);
996                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
997                 if (err == -ENOENT) {
998                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
999                         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1000                 }
1001         } else {
1002                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1003                 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1004         }
1005         if (err)
1006                 goto out;
1007         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1008         if (err)
1009                 goto out;
1010         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1011         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1012         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1013
1014         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1015                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name);
1016                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1017                 if (err == -ENOENT) {
1018                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1019                         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1020                 }
1021         } else {
1022                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1023                 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1024         }
1025         if (err)
1026                 goto out;
1027         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1028         if (err)
1029                 goto out;
1030         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1031         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1032
1033         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1034
1035         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1036                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name);
1037                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1038                 if (err == -ENOENT) {
1039                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1040                         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1041                 }
1042         } else {
1043                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1044                 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1045         }
1046         if (err)
1047                 goto out;
1048         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1049         if (err)
1050                 goto out;
1051         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1052         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1053         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1054
1055         /*
1056          * Support for MCBP/Virtualization in combination with chained IBs is
1057          * formal released on feature version #46
1058          */
1059         if (adev->gfx.ce_feature_version >= 46 &&
1060             adev->gfx.pfp_feature_version >= 46) {
1061                 adev->virt.chained_ib_support = true;
1062                 DRM_INFO("Chained IB support enabled!\n");
1063         } else
1064                 adev->virt.chained_ib_support = false;
1065
1066         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1067         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1068         if (err)
1069                 goto out;
1070         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1071         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1072         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1073         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1074
1075         adev->gfx.rlc.save_and_restore_offset =
1076                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1077         adev->gfx.rlc.clear_state_descriptor_offset =
1078                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1079         adev->gfx.rlc.avail_scratch_ram_locations =
1080                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1081         adev->gfx.rlc.reg_restore_list_size =
1082                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1083         adev->gfx.rlc.reg_list_format_start =
1084                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1085         adev->gfx.rlc.reg_list_format_separate_start =
1086                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1087         adev->gfx.rlc.starting_offsets_start =
1088                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1089         adev->gfx.rlc.reg_list_format_size_bytes =
1090                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1091         adev->gfx.rlc.reg_list_size_bytes =
1092                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1093
1094         adev->gfx.rlc.register_list_format =
1095                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1096                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1097
1098         if (!adev->gfx.rlc.register_list_format) {
1099                 err = -ENOMEM;
1100                 goto out;
1101         }
1102
1103         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1104                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1105         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1106                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1107
1108         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1109
1110         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1111                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1112         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1113                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1114
1115         if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1116                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name);
1117                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1118                 if (err == -ENOENT) {
1119                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1120                         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1121                 }
1122         } else {
1123                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1124                 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1125         }
1126         if (err)
1127                 goto out;
1128         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1129         if (err)
1130                 goto out;
1131         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1132         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1133         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1134
1135         if ((adev->asic_type != CHIP_STONEY) &&
1136             (adev->asic_type != CHIP_TOPAZ)) {
1137                 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) {
1138                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name);
1139                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1140                         if (err == -ENOENT) {
1141                                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1142                                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1143                         }
1144                 } else {
1145                         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1146                         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1147                 }
1148                 if (!err) {
1149                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1150                         if (err)
1151                                 goto out;
1152                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1153                                 adev->gfx.mec2_fw->data;
1154                         adev->gfx.mec2_fw_version =
1155                                 le32_to_cpu(cp_hdr->header.ucode_version);
1156                         adev->gfx.mec2_feature_version =
1157                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1158                 } else {
1159                         err = 0;
1160                         adev->gfx.mec2_fw = NULL;
1161                 }
1162         }
1163
1164         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1165         info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1166         info->fw = adev->gfx.pfp_fw;
1167         header = (const struct common_firmware_header *)info->fw->data;
1168         adev->firmware.fw_size +=
1169                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1170
1171         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1172         info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1173         info->fw = adev->gfx.me_fw;
1174         header = (const struct common_firmware_header *)info->fw->data;
1175         adev->firmware.fw_size +=
1176                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1177
1178         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1179         info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1180         info->fw = adev->gfx.ce_fw;
1181         header = (const struct common_firmware_header *)info->fw->data;
1182         adev->firmware.fw_size +=
1183                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1184
1185         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1186         info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1187         info->fw = adev->gfx.rlc_fw;
1188         header = (const struct common_firmware_header *)info->fw->data;
1189         adev->firmware.fw_size +=
1190                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1191
1192         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1193         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1194         info->fw = adev->gfx.mec_fw;
1195         header = (const struct common_firmware_header *)info->fw->data;
1196         adev->firmware.fw_size +=
1197                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1198
1199         /* we need account JT in */
1200         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1201         adev->firmware.fw_size +=
1202                 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1203
1204         if (amdgpu_sriov_vf(adev)) {
1205                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1206                 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1207                 info->fw = adev->gfx.mec_fw;
1208                 adev->firmware.fw_size +=
1209                         ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1210         }
1211
1212         if (adev->gfx.mec2_fw) {
1213                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1214                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1215                 info->fw = adev->gfx.mec2_fw;
1216                 header = (const struct common_firmware_header *)info->fw->data;
1217                 adev->firmware.fw_size +=
1218                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1219         }
1220
1221 out:
1222         if (err) {
1223                 dev_err(adev->dev,
1224                         "gfx8: Failed to load firmware \"%s\"\n",
1225                         fw_name);
1226                 release_firmware(adev->gfx.pfp_fw);
1227                 adev->gfx.pfp_fw = NULL;
1228                 release_firmware(adev->gfx.me_fw);
1229                 adev->gfx.me_fw = NULL;
1230                 release_firmware(adev->gfx.ce_fw);
1231                 adev->gfx.ce_fw = NULL;
1232                 release_firmware(adev->gfx.rlc_fw);
1233                 adev->gfx.rlc_fw = NULL;
1234                 release_firmware(adev->gfx.mec_fw);
1235                 adev->gfx.mec_fw = NULL;
1236                 release_firmware(adev->gfx.mec2_fw);
1237                 adev->gfx.mec2_fw = NULL;
1238         }
1239         return err;
1240 }
1241
1242 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1243                                     volatile u32 *buffer)
1244 {
1245         u32 count = 0, i;
1246         const struct cs_section_def *sect = NULL;
1247         const struct cs_extent_def *ext = NULL;
1248
1249         if (adev->gfx.rlc.cs_data == NULL)
1250                 return;
1251         if (buffer == NULL)
1252                 return;
1253
1254         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1255         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1256
1257         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1258         buffer[count++] = cpu_to_le32(0x80000000);
1259         buffer[count++] = cpu_to_le32(0x80000000);
1260
1261         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1262                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1263                         if (sect->id == SECT_CONTEXT) {
1264                                 buffer[count++] =
1265                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1266                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1267                                                 PACKET3_SET_CONTEXT_REG_START);
1268                                 for (i = 0; i < ext->reg_count; i++)
1269                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1270                         } else {
1271                                 return;
1272                         }
1273                 }
1274         }
1275
1276         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1277         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1278                         PACKET3_SET_CONTEXT_REG_START);
1279         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1280         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1281
1282         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1283         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1284
1285         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1286         buffer[count++] = cpu_to_le32(0);
1287 }
1288
1289 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev)
1290 {
1291         if (adev->asic_type == CHIP_CARRIZO)
1292                 return 5;
1293         else
1294                 return 4;
1295 }
1296
1297 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1298 {
1299         const struct cs_section_def *cs_data;
1300         int r;
1301
1302         adev->gfx.rlc.cs_data = vi_cs_data;
1303
1304         cs_data = adev->gfx.rlc.cs_data;
1305
1306         if (cs_data) {
1307                 /* init clear state block */
1308                 r = amdgpu_gfx_rlc_init_csb(adev);
1309                 if (r)
1310                         return r;
1311         }
1312
1313         if ((adev->asic_type == CHIP_CARRIZO) ||
1314             (adev->asic_type == CHIP_STONEY)) {
1315                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1316                 r = amdgpu_gfx_rlc_init_cpt(adev);
1317                 if (r)
1318                         return r;
1319         }
1320
1321         return 0;
1322 }
1323
1324 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1325 {
1326         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1327 }
1328
1329 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1330 {
1331         int r;
1332         u32 *hpd;
1333         size_t mec_hpd_size;
1334
1335         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1336
1337         /* take ownership of the relevant compute queues */
1338         amdgpu_gfx_compute_queue_acquire(adev);
1339
1340         mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
1341
1342         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1343                                       AMDGPU_GEM_DOMAIN_VRAM,
1344                                       &adev->gfx.mec.hpd_eop_obj,
1345                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1346                                       (void **)&hpd);
1347         if (r) {
1348                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1349                 return r;
1350         }
1351
1352         memset(hpd, 0, mec_hpd_size);
1353
1354         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1355         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1356
1357         return 0;
1358 }
1359
1360 static const u32 vgpr_init_compute_shader[] =
1361 {
1362         0x7e000209, 0x7e020208,
1363         0x7e040207, 0x7e060206,
1364         0x7e080205, 0x7e0a0204,
1365         0x7e0c0203, 0x7e0e0202,
1366         0x7e100201, 0x7e120200,
1367         0x7e140209, 0x7e160208,
1368         0x7e180207, 0x7e1a0206,
1369         0x7e1c0205, 0x7e1e0204,
1370         0x7e200203, 0x7e220202,
1371         0x7e240201, 0x7e260200,
1372         0x7e280209, 0x7e2a0208,
1373         0x7e2c0207, 0x7e2e0206,
1374         0x7e300205, 0x7e320204,
1375         0x7e340203, 0x7e360202,
1376         0x7e380201, 0x7e3a0200,
1377         0x7e3c0209, 0x7e3e0208,
1378         0x7e400207, 0x7e420206,
1379         0x7e440205, 0x7e460204,
1380         0x7e480203, 0x7e4a0202,
1381         0x7e4c0201, 0x7e4e0200,
1382         0x7e500209, 0x7e520208,
1383         0x7e540207, 0x7e560206,
1384         0x7e580205, 0x7e5a0204,
1385         0x7e5c0203, 0x7e5e0202,
1386         0x7e600201, 0x7e620200,
1387         0x7e640209, 0x7e660208,
1388         0x7e680207, 0x7e6a0206,
1389         0x7e6c0205, 0x7e6e0204,
1390         0x7e700203, 0x7e720202,
1391         0x7e740201, 0x7e760200,
1392         0x7e780209, 0x7e7a0208,
1393         0x7e7c0207, 0x7e7e0206,
1394         0xbf8a0000, 0xbf810000,
1395 };
1396
1397 static const u32 sgpr_init_compute_shader[] =
1398 {
1399         0xbe8a0100, 0xbe8c0102,
1400         0xbe8e0104, 0xbe900106,
1401         0xbe920108, 0xbe940100,
1402         0xbe960102, 0xbe980104,
1403         0xbe9a0106, 0xbe9c0108,
1404         0xbe9e0100, 0xbea00102,
1405         0xbea20104, 0xbea40106,
1406         0xbea60108, 0xbea80100,
1407         0xbeaa0102, 0xbeac0104,
1408         0xbeae0106, 0xbeb00108,
1409         0xbeb20100, 0xbeb40102,
1410         0xbeb60104, 0xbeb80106,
1411         0xbeba0108, 0xbebc0100,
1412         0xbebe0102, 0xbec00104,
1413         0xbec20106, 0xbec40108,
1414         0xbec60100, 0xbec80102,
1415         0xbee60004, 0xbee70005,
1416         0xbeea0006, 0xbeeb0007,
1417         0xbee80008, 0xbee90009,
1418         0xbefc0000, 0xbf8a0000,
1419         0xbf810000, 0x00000000,
1420 };
1421
1422 static const u32 vgpr_init_regs[] =
1423 {
1424         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1425         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1426         mmCOMPUTE_NUM_THREAD_X, 256*4,
1427         mmCOMPUTE_NUM_THREAD_Y, 1,
1428         mmCOMPUTE_NUM_THREAD_Z, 1,
1429         mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */
1430         mmCOMPUTE_PGM_RSRC2, 20,
1431         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1432         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1433         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1434         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1435         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1436         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1437         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1438         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1439         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1440         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1441 };
1442
1443 static const u32 sgpr1_init_regs[] =
1444 {
1445         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1446         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */
1447         mmCOMPUTE_NUM_THREAD_X, 256*5,
1448         mmCOMPUTE_NUM_THREAD_Y, 1,
1449         mmCOMPUTE_NUM_THREAD_Z, 1,
1450         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1451         mmCOMPUTE_PGM_RSRC2, 20,
1452         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1453         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1454         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1455         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1456         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1457         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1458         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1459         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1460         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1461         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1462 };
1463
1464 static const u32 sgpr2_init_regs[] =
1465 {
1466         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1467         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1468         mmCOMPUTE_NUM_THREAD_X, 256*5,
1469         mmCOMPUTE_NUM_THREAD_Y, 1,
1470         mmCOMPUTE_NUM_THREAD_Z, 1,
1471         mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */
1472         mmCOMPUTE_PGM_RSRC2, 20,
1473         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1474         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1475         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1476         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1477         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1478         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1479         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1480         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1481         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1482         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1483 };
1484
1485 static const u32 sec_ded_counter_registers[] =
1486 {
1487         mmCPC_EDC_ATC_CNT,
1488         mmCPC_EDC_SCRATCH_CNT,
1489         mmCPC_EDC_UCODE_CNT,
1490         mmCPF_EDC_ATC_CNT,
1491         mmCPF_EDC_ROQ_CNT,
1492         mmCPF_EDC_TAG_CNT,
1493         mmCPG_EDC_ATC_CNT,
1494         mmCPG_EDC_DMA_CNT,
1495         mmCPG_EDC_TAG_CNT,
1496         mmDC_EDC_CSINVOC_CNT,
1497         mmDC_EDC_RESTORE_CNT,
1498         mmDC_EDC_STATE_CNT,
1499         mmGDS_EDC_CNT,
1500         mmGDS_EDC_GRBM_CNT,
1501         mmGDS_EDC_OA_DED,
1502         mmSPI_EDC_CNT,
1503         mmSQC_ATC_EDC_GATCL1_CNT,
1504         mmSQC_EDC_CNT,
1505         mmSQ_EDC_DED_CNT,
1506         mmSQ_EDC_INFO,
1507         mmSQ_EDC_SEC_CNT,
1508         mmTCC_EDC_CNT,
1509         mmTCP_ATC_EDC_GATCL1_CNT,
1510         mmTCP_EDC_CNT,
1511         mmTD_EDC_CNT
1512 };
1513
1514 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1515 {
1516         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1517         struct amdgpu_ib ib;
1518         struct dma_fence *f = NULL;
1519         int r, i;
1520         u32 tmp;
1521         unsigned total_size, vgpr_offset, sgpr_offset;
1522         u64 gpu_addr;
1523
1524         /* only supported on CZ */
1525         if (adev->asic_type != CHIP_CARRIZO)
1526                 return 0;
1527
1528         /* bail if the compute ring is not ready */
1529         if (!ring->sched.ready)
1530                 return 0;
1531
1532         tmp = RREG32(mmGB_EDC_MODE);
1533         WREG32(mmGB_EDC_MODE, 0);
1534
1535         total_size =
1536                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1537         total_size +=
1538                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1539         total_size +=
1540                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1541         total_size = ALIGN(total_size, 256);
1542         vgpr_offset = total_size;
1543         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1544         sgpr_offset = total_size;
1545         total_size += sizeof(sgpr_init_compute_shader);
1546
1547         /* allocate an indirect buffer to put the commands in */
1548         memset(&ib, 0, sizeof(ib));
1549         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1550         if (r) {
1551                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1552                 return r;
1553         }
1554
1555         /* load the compute shaders */
1556         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1557                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1558
1559         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1560                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1561
1562         /* init the ib length to 0 */
1563         ib.length_dw = 0;
1564
1565         /* VGPR */
1566         /* write the register state for the compute dispatch */
1567         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1568                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1569                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1570                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1571         }
1572         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1573         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1574         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1575         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1576         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1577         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1578
1579         /* write dispatch packet */
1580         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1581         ib.ptr[ib.length_dw++] = 8; /* x */
1582         ib.ptr[ib.length_dw++] = 1; /* y */
1583         ib.ptr[ib.length_dw++] = 1; /* z */
1584         ib.ptr[ib.length_dw++] =
1585                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1586
1587         /* write CS partial flush packet */
1588         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1589         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1590
1591         /* SGPR1 */
1592         /* write the register state for the compute dispatch */
1593         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1594                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1595                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1596                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1597         }
1598         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1599         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1600         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1601         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1602         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1603         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1604
1605         /* write dispatch packet */
1606         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1607         ib.ptr[ib.length_dw++] = 8; /* x */
1608         ib.ptr[ib.length_dw++] = 1; /* y */
1609         ib.ptr[ib.length_dw++] = 1; /* z */
1610         ib.ptr[ib.length_dw++] =
1611                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1612
1613         /* write CS partial flush packet */
1614         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1615         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1616
1617         /* SGPR2 */
1618         /* write the register state for the compute dispatch */
1619         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1620                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1621                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1622                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1623         }
1624         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1625         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1626         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1627         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1628         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1629         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1630
1631         /* write dispatch packet */
1632         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1633         ib.ptr[ib.length_dw++] = 8; /* x */
1634         ib.ptr[ib.length_dw++] = 1; /* y */
1635         ib.ptr[ib.length_dw++] = 1; /* z */
1636         ib.ptr[ib.length_dw++] =
1637                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1638
1639         /* write CS partial flush packet */
1640         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1641         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1642
1643         /* shedule the ib on the ring */
1644         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1645         if (r) {
1646                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1647                 goto fail;
1648         }
1649
1650         /* wait for the GPU to finish processing the IB */
1651         r = dma_fence_wait(f, false);
1652         if (r) {
1653                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1654                 goto fail;
1655         }
1656
1657         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1658         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1659         WREG32(mmGB_EDC_MODE, tmp);
1660
1661         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1662         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1663         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1664
1665
1666         /* read back registers to clear the counters */
1667         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1668                 RREG32(sec_ded_counter_registers[i]);
1669
1670 fail:
1671         amdgpu_ib_free(adev, &ib, NULL);
1672         dma_fence_put(f);
1673
1674         return r;
1675 }
1676
1677 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1678 {
1679         u32 gb_addr_config;
1680         u32 mc_shared_chmap, mc_arb_ramcfg;
1681         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1682         u32 tmp;
1683         int ret;
1684
1685         switch (adev->asic_type) {
1686         case CHIP_TOPAZ:
1687                 adev->gfx.config.max_shader_engines = 1;
1688                 adev->gfx.config.max_tile_pipes = 2;
1689                 adev->gfx.config.max_cu_per_sh = 6;
1690                 adev->gfx.config.max_sh_per_se = 1;
1691                 adev->gfx.config.max_backends_per_se = 2;
1692                 adev->gfx.config.max_texture_channel_caches = 2;
1693                 adev->gfx.config.max_gprs = 256;
1694                 adev->gfx.config.max_gs_threads = 32;
1695                 adev->gfx.config.max_hw_contexts = 8;
1696
1697                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1698                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1699                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1700                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1701                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1702                 break;
1703         case CHIP_FIJI:
1704                 adev->gfx.config.max_shader_engines = 4;
1705                 adev->gfx.config.max_tile_pipes = 16;
1706                 adev->gfx.config.max_cu_per_sh = 16;
1707                 adev->gfx.config.max_sh_per_se = 1;
1708                 adev->gfx.config.max_backends_per_se = 4;
1709                 adev->gfx.config.max_texture_channel_caches = 16;
1710                 adev->gfx.config.max_gprs = 256;
1711                 adev->gfx.config.max_gs_threads = 32;
1712                 adev->gfx.config.max_hw_contexts = 8;
1713
1714                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1715                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1716                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1717                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1718                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1719                 break;
1720         case CHIP_POLARIS11:
1721         case CHIP_POLARIS12:
1722                 ret = amdgpu_atombios_get_gfx_info(adev);
1723                 if (ret)
1724                         return ret;
1725                 adev->gfx.config.max_gprs = 256;
1726                 adev->gfx.config.max_gs_threads = 32;
1727                 adev->gfx.config.max_hw_contexts = 8;
1728
1729                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1730                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1731                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1732                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1733                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1734                 break;
1735         case CHIP_POLARIS10:
1736         case CHIP_VEGAM:
1737                 ret = amdgpu_atombios_get_gfx_info(adev);
1738                 if (ret)
1739                         return ret;
1740                 adev->gfx.config.max_gprs = 256;
1741                 adev->gfx.config.max_gs_threads = 32;
1742                 adev->gfx.config.max_hw_contexts = 8;
1743
1744                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1745                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1746                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1747                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1748                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1749                 break;
1750         case CHIP_TONGA:
1751                 adev->gfx.config.max_shader_engines = 4;
1752                 adev->gfx.config.max_tile_pipes = 8;
1753                 adev->gfx.config.max_cu_per_sh = 8;
1754                 adev->gfx.config.max_sh_per_se = 1;
1755                 adev->gfx.config.max_backends_per_se = 2;
1756                 adev->gfx.config.max_texture_channel_caches = 8;
1757                 adev->gfx.config.max_gprs = 256;
1758                 adev->gfx.config.max_gs_threads = 32;
1759                 adev->gfx.config.max_hw_contexts = 8;
1760
1761                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1762                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1763                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1764                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1765                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1766                 break;
1767         case CHIP_CARRIZO:
1768                 adev->gfx.config.max_shader_engines = 1;
1769                 adev->gfx.config.max_tile_pipes = 2;
1770                 adev->gfx.config.max_sh_per_se = 1;
1771                 adev->gfx.config.max_backends_per_se = 2;
1772                 adev->gfx.config.max_cu_per_sh = 8;
1773                 adev->gfx.config.max_texture_channel_caches = 2;
1774                 adev->gfx.config.max_gprs = 256;
1775                 adev->gfx.config.max_gs_threads = 32;
1776                 adev->gfx.config.max_hw_contexts = 8;
1777
1778                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1779                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1780                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1781                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1782                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1783                 break;
1784         case CHIP_STONEY:
1785                 adev->gfx.config.max_shader_engines = 1;
1786                 adev->gfx.config.max_tile_pipes = 2;
1787                 adev->gfx.config.max_sh_per_se = 1;
1788                 adev->gfx.config.max_backends_per_se = 1;
1789                 adev->gfx.config.max_cu_per_sh = 3;
1790                 adev->gfx.config.max_texture_channel_caches = 2;
1791                 adev->gfx.config.max_gprs = 256;
1792                 adev->gfx.config.max_gs_threads = 16;
1793                 adev->gfx.config.max_hw_contexts = 8;
1794
1795                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1796                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1797                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1798                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1799                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1800                 break;
1801         default:
1802                 adev->gfx.config.max_shader_engines = 2;
1803                 adev->gfx.config.max_tile_pipes = 4;
1804                 adev->gfx.config.max_cu_per_sh = 2;
1805                 adev->gfx.config.max_sh_per_se = 1;
1806                 adev->gfx.config.max_backends_per_se = 2;
1807                 adev->gfx.config.max_texture_channel_caches = 4;
1808                 adev->gfx.config.max_gprs = 256;
1809                 adev->gfx.config.max_gs_threads = 32;
1810                 adev->gfx.config.max_hw_contexts = 8;
1811
1812                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1813                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1814                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1815                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1816                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1817                 break;
1818         }
1819
1820         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1821         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1822         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1823
1824         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1825         adev->gfx.config.mem_max_burst_length_bytes = 256;
1826         if (adev->flags & AMD_IS_APU) {
1827                 /* Get memory bank mapping mode. */
1828                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1829                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1830                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1831
1832                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1833                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1834                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1835
1836                 /* Validate settings in case only one DIMM installed. */
1837                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1838                         dimm00_addr_map = 0;
1839                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1840                         dimm01_addr_map = 0;
1841                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1842                         dimm10_addr_map = 0;
1843                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1844                         dimm11_addr_map = 0;
1845
1846                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1847                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1848                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1849                         adev->gfx.config.mem_row_size_in_kb = 2;
1850                 else
1851                         adev->gfx.config.mem_row_size_in_kb = 1;
1852         } else {
1853                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1854                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1855                 if (adev->gfx.config.mem_row_size_in_kb > 4)
1856                         adev->gfx.config.mem_row_size_in_kb = 4;
1857         }
1858
1859         adev->gfx.config.shader_engine_tile_size = 32;
1860         adev->gfx.config.num_gpus = 1;
1861         adev->gfx.config.multi_gpu_tile_size = 64;
1862
1863         /* fix up row size */
1864         switch (adev->gfx.config.mem_row_size_in_kb) {
1865         case 1:
1866         default:
1867                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1868                 break;
1869         case 2:
1870                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1871                 break;
1872         case 4:
1873                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1874                 break;
1875         }
1876         adev->gfx.config.gb_addr_config = gb_addr_config;
1877
1878         return 0;
1879 }
1880
1881 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1882                                         int mec, int pipe, int queue)
1883 {
1884         int r;
1885         unsigned irq_type;
1886         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1887
1888         ring = &adev->gfx.compute_ring[ring_id];
1889
1890         /* mec0 is me1 */
1891         ring->me = mec + 1;
1892         ring->pipe = pipe;
1893         ring->queue = queue;
1894
1895         ring->ring_obj = NULL;
1896         ring->use_doorbell = true;
1897         ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id;
1898         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1899                                 + (ring_id * GFX8_MEC_HPD_SIZE);
1900         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1901
1902         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1903                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1904                 + ring->pipe;
1905
1906         /* type-2 packets are deprecated on MEC, use type-3 instead */
1907         r = amdgpu_ring_init(adev, ring, 1024,
1908                         &adev->gfx.eop_irq, irq_type);
1909         if (r)
1910                 return r;
1911
1912
1913         return 0;
1914 }
1915
1916 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work);
1917
1918 static int gfx_v8_0_sw_init(void *handle)
1919 {
1920         int i, j, k, r, ring_id;
1921         struct amdgpu_ring *ring;
1922         struct amdgpu_kiq *kiq;
1923         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1924
1925         switch (adev->asic_type) {
1926         case CHIP_TONGA:
1927         case CHIP_CARRIZO:
1928         case CHIP_FIJI:
1929         case CHIP_POLARIS10:
1930         case CHIP_POLARIS11:
1931         case CHIP_POLARIS12:
1932         case CHIP_VEGAM:
1933                 adev->gfx.mec.num_mec = 2;
1934                 break;
1935         case CHIP_TOPAZ:
1936         case CHIP_STONEY:
1937         default:
1938                 adev->gfx.mec.num_mec = 1;
1939                 break;
1940         }
1941
1942         adev->gfx.mec.num_pipe_per_mec = 4;
1943         adev->gfx.mec.num_queue_per_pipe = 8;
1944
1945         /* EOP Event */
1946         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq);
1947         if (r)
1948                 return r;
1949
1950         /* Privileged reg */
1951         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT,
1952                               &adev->gfx.priv_reg_irq);
1953         if (r)
1954                 return r;
1955
1956         /* Privileged inst */
1957         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT,
1958                               &adev->gfx.priv_inst_irq);
1959         if (r)
1960                 return r;
1961
1962         /* Add CP EDC/ECC irq  */
1963         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR,
1964                               &adev->gfx.cp_ecc_error_irq);
1965         if (r)
1966                 return r;
1967
1968         /* SQ interrupts. */
1969         r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG,
1970                               &adev->gfx.sq_irq);
1971         if (r) {
1972                 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r);
1973                 return r;
1974         }
1975
1976         INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func);
1977
1978         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1979
1980         gfx_v8_0_scratch_init(adev);
1981
1982         r = gfx_v8_0_init_microcode(adev);
1983         if (r) {
1984                 DRM_ERROR("Failed to load gfx firmware!\n");
1985                 return r;
1986         }
1987
1988         r = adev->gfx.rlc.funcs->init(adev);
1989         if (r) {
1990                 DRM_ERROR("Failed to init rlc BOs!\n");
1991                 return r;
1992         }
1993
1994         r = gfx_v8_0_mec_init(adev);
1995         if (r) {
1996                 DRM_ERROR("Failed to init MEC BOs!\n");
1997                 return r;
1998         }
1999
2000         /* set up the gfx ring */
2001         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2002                 ring = &adev->gfx.gfx_ring[i];
2003                 ring->ring_obj = NULL;
2004                 sprintf(ring->name, "gfx");
2005                 /* no gfx doorbells on iceland */
2006                 if (adev->asic_type != CHIP_TOPAZ) {
2007                         ring->use_doorbell = true;
2008                         ring->doorbell_index = adev->doorbell_index.gfx_ring0;
2009                 }
2010
2011                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2012                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2013                 if (r)
2014                         return r;
2015         }
2016
2017
2018         /* set up the compute queues - allocate horizontally across pipes */
2019         ring_id = 0;
2020         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2021                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2022                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2023                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2024                                         continue;
2025
2026                                 r = gfx_v8_0_compute_ring_init(adev,
2027                                                                 ring_id,
2028                                                                 i, k, j);
2029                                 if (r)
2030                                         return r;
2031
2032                                 ring_id++;
2033                         }
2034                 }
2035         }
2036
2037         r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE);
2038         if (r) {
2039                 DRM_ERROR("Failed to init KIQ BOs!\n");
2040                 return r;
2041         }
2042
2043         kiq = &adev->gfx.kiq;
2044         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2045         if (r)
2046                 return r;
2047
2048         /* create MQD for all compute queues as well as KIQ for SRIOV case */
2049         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2050         if (r)
2051                 return r;
2052
2053         adev->gfx.ce_ram_size = 0x8000;
2054
2055         r = gfx_v8_0_gpu_early_init(adev);
2056         if (r)
2057                 return r;
2058
2059         return 0;
2060 }
2061
2062 static int gfx_v8_0_sw_fini(void *handle)
2063 {
2064         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2065         int i;
2066
2067         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2068                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2069         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2070                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2071
2072         amdgpu_gfx_mqd_sw_fini(adev);
2073         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2074         amdgpu_gfx_kiq_fini(adev);
2075
2076         gfx_v8_0_mec_fini(adev);
2077         amdgpu_gfx_rlc_fini(adev);
2078         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2079                                 &adev->gfx.rlc.clear_state_gpu_addr,
2080                                 (void **)&adev->gfx.rlc.cs_ptr);
2081         if ((adev->asic_type == CHIP_CARRIZO) ||
2082             (adev->asic_type == CHIP_STONEY)) {
2083                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2084                                 &adev->gfx.rlc.cp_table_gpu_addr,
2085                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2086         }
2087         gfx_v8_0_free_microcode(adev);
2088
2089         return 0;
2090 }
2091
2092 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2093 {
2094         uint32_t *modearray, *mod2array;
2095         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2096         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2097         u32 reg_offset;
2098
2099         modearray = adev->gfx.config.tile_mode_array;
2100         mod2array = adev->gfx.config.macrotile_mode_array;
2101
2102         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2103                 modearray[reg_offset] = 0;
2104
2105         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2106                 mod2array[reg_offset] = 0;
2107
2108         switch (adev->asic_type) {
2109         case CHIP_TOPAZ:
2110                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2111                                 PIPE_CONFIG(ADDR_SURF_P2) |
2112                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2113                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2114                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2115                                 PIPE_CONFIG(ADDR_SURF_P2) |
2116                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2117                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2118                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2119                                 PIPE_CONFIG(ADDR_SURF_P2) |
2120                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2121                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2122                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2123                                 PIPE_CONFIG(ADDR_SURF_P2) |
2124                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2125                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2126                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2127                                 PIPE_CONFIG(ADDR_SURF_P2) |
2128                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2129                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2130                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2131                                 PIPE_CONFIG(ADDR_SURF_P2) |
2132                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2133                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2134                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2135                                 PIPE_CONFIG(ADDR_SURF_P2) |
2136                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2137                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2138                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2139                                 PIPE_CONFIG(ADDR_SURF_P2));
2140                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2141                                 PIPE_CONFIG(ADDR_SURF_P2) |
2142                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2143                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2144                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2145                                  PIPE_CONFIG(ADDR_SURF_P2) |
2146                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2147                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2148                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2149                                  PIPE_CONFIG(ADDR_SURF_P2) |
2150                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2151                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2152                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2153                                  PIPE_CONFIG(ADDR_SURF_P2) |
2154                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2155                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2156                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157                                  PIPE_CONFIG(ADDR_SURF_P2) |
2158                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2159                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2160                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2161                                  PIPE_CONFIG(ADDR_SURF_P2) |
2162                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2163                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2164                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2165                                  PIPE_CONFIG(ADDR_SURF_P2) |
2166                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2167                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2168                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2169                                  PIPE_CONFIG(ADDR_SURF_P2) |
2170                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2171                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2172                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2173                                  PIPE_CONFIG(ADDR_SURF_P2) |
2174                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2175                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2176                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2177                                  PIPE_CONFIG(ADDR_SURF_P2) |
2178                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2179                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2180                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2181                                  PIPE_CONFIG(ADDR_SURF_P2) |
2182                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2183                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2184                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2185                                  PIPE_CONFIG(ADDR_SURF_P2) |
2186                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2187                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2188                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2189                                  PIPE_CONFIG(ADDR_SURF_P2) |
2190                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2191                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2192                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2193                                  PIPE_CONFIG(ADDR_SURF_P2) |
2194                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2195                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2196                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2197                                  PIPE_CONFIG(ADDR_SURF_P2) |
2198                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2199                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2200                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2201                                  PIPE_CONFIG(ADDR_SURF_P2) |
2202                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2203                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2204                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2205                                  PIPE_CONFIG(ADDR_SURF_P2) |
2206                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2207                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2208                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2209                                  PIPE_CONFIG(ADDR_SURF_P2) |
2210                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2212
2213                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2214                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2215                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2216                                 NUM_BANKS(ADDR_SURF_8_BANK));
2217                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2218                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2219                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2220                                 NUM_BANKS(ADDR_SURF_8_BANK));
2221                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2222                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2223                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2224                                 NUM_BANKS(ADDR_SURF_8_BANK));
2225                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2226                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2227                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2228                                 NUM_BANKS(ADDR_SURF_8_BANK));
2229                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2231                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2232                                 NUM_BANKS(ADDR_SURF_8_BANK));
2233                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2234                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2235                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2236                                 NUM_BANKS(ADDR_SURF_8_BANK));
2237                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2240                                 NUM_BANKS(ADDR_SURF_8_BANK));
2241                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2242                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2243                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2244                                 NUM_BANKS(ADDR_SURF_16_BANK));
2245                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2246                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2247                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2248                                 NUM_BANKS(ADDR_SURF_16_BANK));
2249                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2250                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2251                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2252                                  NUM_BANKS(ADDR_SURF_16_BANK));
2253                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2254                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2255                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2256                                  NUM_BANKS(ADDR_SURF_16_BANK));
2257                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2258                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2259                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2260                                  NUM_BANKS(ADDR_SURF_16_BANK));
2261                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2264                                  NUM_BANKS(ADDR_SURF_16_BANK));
2265                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2266                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2267                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2268                                  NUM_BANKS(ADDR_SURF_8_BANK));
2269
2270                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2271                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2272                             reg_offset != 23)
2273                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2274
2275                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2276                         if (reg_offset != 7)
2277                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2278
2279                 break;
2280         case CHIP_FIJI:
2281         case CHIP_VEGAM:
2282                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2283                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2284                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2285                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2286                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2287                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2288                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2289                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2290                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2291                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2292                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2293                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2294                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2296                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2297                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2298                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2299                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2300                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2301                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2302                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2303                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2304                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2305                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2306                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2307                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2308                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2309                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2310                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2311                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2312                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2313                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2314                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2315                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2316                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2317                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2318                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2319                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2324                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2325                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2328                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2329                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2332                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2333                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2336                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2340                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2341                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2344                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2345                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2346                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2347                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2348                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2349                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2350                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2351                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2352                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2353                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2355                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2356                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2357                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2359                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2360                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2361                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2363                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2364                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2365                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2367                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2368                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2369                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2371                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2372                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2373                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2375                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2376                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2377                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2380                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2381                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2383                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2384                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2385                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2387                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2388                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2389                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2391                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2392                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2395                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2396                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2397                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2400                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2401                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2404
2405                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2406                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2407                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2408                                 NUM_BANKS(ADDR_SURF_8_BANK));
2409                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2411                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2412                                 NUM_BANKS(ADDR_SURF_8_BANK));
2413                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2415                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2416                                 NUM_BANKS(ADDR_SURF_8_BANK));
2417                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2418                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2419                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2420                                 NUM_BANKS(ADDR_SURF_8_BANK));
2421                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2423                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424                                 NUM_BANKS(ADDR_SURF_8_BANK));
2425                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2426                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2427                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2428                                 NUM_BANKS(ADDR_SURF_8_BANK));
2429                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2431                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2432                                 NUM_BANKS(ADDR_SURF_8_BANK));
2433                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2435                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2436                                 NUM_BANKS(ADDR_SURF_8_BANK));
2437                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2438                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2439                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2440                                 NUM_BANKS(ADDR_SURF_8_BANK));
2441                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2443                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2444                                  NUM_BANKS(ADDR_SURF_8_BANK));
2445                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2447                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2448                                  NUM_BANKS(ADDR_SURF_8_BANK));
2449                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2451                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452                                  NUM_BANKS(ADDR_SURF_8_BANK));
2453                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456                                  NUM_BANKS(ADDR_SURF_8_BANK));
2457                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460                                  NUM_BANKS(ADDR_SURF_4_BANK));
2461
2462                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2463                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2464
2465                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2466                         if (reg_offset != 7)
2467                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2468
2469                 break;
2470         case CHIP_TONGA:
2471                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2472                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2473                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2474                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2475                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2476                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2477                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2478                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2479                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2480                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2481                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2482                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2483                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2484                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2485                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2486                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2487                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2488                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2489                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2490                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2491                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2492                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2493                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2494                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2495                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2496                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2497                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2498                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2499                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2500                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2501                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2502                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2503                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2504                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2505                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2506                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2508                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2509                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2510                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2512                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2513                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2514                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2516                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2517                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2518                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2519                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2520                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2521                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2522                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2523                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2524                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2525                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2527                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2529                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2530                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2531                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2532                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2533                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2534                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2536                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2537                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2538                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2540                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2541                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2542                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2544                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2545                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2546                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2548                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2549                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2550                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2552                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2553                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2554                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2556                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2557                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2558                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2560                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2561                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2562                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2564                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2565                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2566                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2569                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2570                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2572                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2573                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2574                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2576                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2577                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2578                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2580                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2581                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2582                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2584                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2585                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2586                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2588                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2589                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2590                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2591                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2592                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2593
2594                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2595                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2596                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2597                                 NUM_BANKS(ADDR_SURF_16_BANK));
2598                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2599                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2600                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2601                                 NUM_BANKS(ADDR_SURF_16_BANK));
2602                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2604                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2605                                 NUM_BANKS(ADDR_SURF_16_BANK));
2606                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2607                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2608                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2609                                 NUM_BANKS(ADDR_SURF_16_BANK));
2610                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2611                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2612                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2613                                 NUM_BANKS(ADDR_SURF_16_BANK));
2614                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2616                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2617                                 NUM_BANKS(ADDR_SURF_16_BANK));
2618                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2620                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2621                                 NUM_BANKS(ADDR_SURF_16_BANK));
2622                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2623                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2624                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2625                                 NUM_BANKS(ADDR_SURF_16_BANK));
2626                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2627                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2628                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2629                                 NUM_BANKS(ADDR_SURF_16_BANK));
2630                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2632                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2633                                  NUM_BANKS(ADDR_SURF_16_BANK));
2634                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2636                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2637                                  NUM_BANKS(ADDR_SURF_16_BANK));
2638                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2641                                  NUM_BANKS(ADDR_SURF_8_BANK));
2642                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2644                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2645                                  NUM_BANKS(ADDR_SURF_4_BANK));
2646                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2648                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2649                                  NUM_BANKS(ADDR_SURF_4_BANK));
2650
2651                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2652                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2653
2654                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2655                         if (reg_offset != 7)
2656                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2657
2658                 break;
2659         case CHIP_POLARIS11:
2660         case CHIP_POLARIS12:
2661                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2664                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2665                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2667                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2668                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2669                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2671                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2672                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2673                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2675                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2676                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2677                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2678                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2680                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2681                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2682                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2684                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2685                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2686                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2688                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2689                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2690                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2692                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2693                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2694                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2695                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2696                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2697                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2698                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2701                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2702                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2704                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2705                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2706                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2707                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2708                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2710                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2711                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2712                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2716                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2717                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2719                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2720                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2722                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2723                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2724                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2725                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2726                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2727                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2729                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2730                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2731                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2732                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2733                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2734                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2735                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2736                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2738                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2739                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2740                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2742                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2743                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2744                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2746                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2747                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2748                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2750                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2751                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2752                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2754                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2755                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2756                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2759                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2760                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2762                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2763                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2764                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2766                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2767                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2768                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2770                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2771                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2772                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2774                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2775                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2776                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2778                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2779                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2780                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2782                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2783
2784                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2786                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2787                                 NUM_BANKS(ADDR_SURF_16_BANK));
2788
2789                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2790                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2791                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2792                                 NUM_BANKS(ADDR_SURF_16_BANK));
2793
2794                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2795                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2796                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2797                                 NUM_BANKS(ADDR_SURF_16_BANK));
2798
2799                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2801                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2802                                 NUM_BANKS(ADDR_SURF_16_BANK));
2803
2804                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2807                                 NUM_BANKS(ADDR_SURF_16_BANK));
2808
2809                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2810                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2811                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2812                                 NUM_BANKS(ADDR_SURF_16_BANK));
2813
2814                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2815                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2816                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2817                                 NUM_BANKS(ADDR_SURF_16_BANK));
2818
2819                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2820                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2821                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2822                                 NUM_BANKS(ADDR_SURF_16_BANK));
2823
2824                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2825                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2826                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2827                                 NUM_BANKS(ADDR_SURF_16_BANK));
2828
2829                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2831                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2832                                 NUM_BANKS(ADDR_SURF_16_BANK));
2833
2834                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837                                 NUM_BANKS(ADDR_SURF_16_BANK));
2838
2839                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2841                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2842                                 NUM_BANKS(ADDR_SURF_16_BANK));
2843
2844                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847                                 NUM_BANKS(ADDR_SURF_8_BANK));
2848
2849                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2852                                 NUM_BANKS(ADDR_SURF_4_BANK));
2853
2854                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2855                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2856
2857                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2858                         if (reg_offset != 7)
2859                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2860
2861                 break;
2862         case CHIP_POLARIS10:
2863                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2864                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2865                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2866                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2867                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2868                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2869                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2870                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2871                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2872                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2873                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2875                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2877                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2878                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2879                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2880                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2881                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2883                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2884                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2885                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2887                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2889                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2892                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2893                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2895                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2896                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2897                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2898                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2899                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2900                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2901                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2902                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2903                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2904                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2905                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2906                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2907                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2908                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2909                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2910                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2911                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2912                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2913                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2914                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2915                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2916                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2917                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2919                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2920                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2922                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2923                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2924                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2926                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2927                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2928                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2929                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2931                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2932                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2933                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2934                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2935                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2936                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2937                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2938                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2940                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2941                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2942                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2944                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2945                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2946                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2948                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2949                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2950                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2951                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2952                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2953                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2954                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2955                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2956                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2957                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2958                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2961                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2962                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2964                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2965                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2966                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2968                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2969                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2970                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2971                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2972                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2973                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2974                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2976                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2977                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2978                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2980                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2981                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2982                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2983                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2984                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2985
2986                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2988                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2989                                 NUM_BANKS(ADDR_SURF_16_BANK));
2990
2991                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2993                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994                                 NUM_BANKS(ADDR_SURF_16_BANK));
2995
2996                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2997                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999                                 NUM_BANKS(ADDR_SURF_16_BANK));
3000
3001                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3002                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3003                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3004                                 NUM_BANKS(ADDR_SURF_16_BANK));
3005
3006                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3007                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3008                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3009                                 NUM_BANKS(ADDR_SURF_16_BANK));
3010
3011                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3012                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3013                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3014                                 NUM_BANKS(ADDR_SURF_16_BANK));
3015
3016                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3017                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3018                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3019                                 NUM_BANKS(ADDR_SURF_16_BANK));
3020
3021                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3023                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024                                 NUM_BANKS(ADDR_SURF_16_BANK));
3025
3026                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3028                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3029                                 NUM_BANKS(ADDR_SURF_16_BANK));
3030
3031                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3033                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3034                                 NUM_BANKS(ADDR_SURF_16_BANK));
3035
3036                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3038                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3039                                 NUM_BANKS(ADDR_SURF_16_BANK));
3040
3041                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3043                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3044                                 NUM_BANKS(ADDR_SURF_8_BANK));
3045
3046                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3049                                 NUM_BANKS(ADDR_SURF_4_BANK));
3050
3051                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3054                                 NUM_BANKS(ADDR_SURF_4_BANK));
3055
3056                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3057                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3058
3059                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3060                         if (reg_offset != 7)
3061                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3062
3063                 break;
3064         case CHIP_STONEY:
3065                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3066                                 PIPE_CONFIG(ADDR_SURF_P2) |
3067                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3068                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3069                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3070                                 PIPE_CONFIG(ADDR_SURF_P2) |
3071                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3072                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3073                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3074                                 PIPE_CONFIG(ADDR_SURF_P2) |
3075                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3076                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3077                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3078                                 PIPE_CONFIG(ADDR_SURF_P2) |
3079                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3080                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3081                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3082                                 PIPE_CONFIG(ADDR_SURF_P2) |
3083                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3084                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3085                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3086                                 PIPE_CONFIG(ADDR_SURF_P2) |
3087                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3088                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3089                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3090                                 PIPE_CONFIG(ADDR_SURF_P2) |
3091                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3093                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3094                                 PIPE_CONFIG(ADDR_SURF_P2));
3095                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3096                                 PIPE_CONFIG(ADDR_SURF_P2) |
3097                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3098                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3099                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3100                                  PIPE_CONFIG(ADDR_SURF_P2) |
3101                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3102                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3103                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3104                                  PIPE_CONFIG(ADDR_SURF_P2) |
3105                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3106                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3107                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3108                                  PIPE_CONFIG(ADDR_SURF_P2) |
3109                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3110                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3111                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3112                                  PIPE_CONFIG(ADDR_SURF_P2) |
3113                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3114                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3115                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3116                                  PIPE_CONFIG(ADDR_SURF_P2) |
3117                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3118                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3119                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3120                                  PIPE_CONFIG(ADDR_SURF_P2) |
3121                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3122                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3123                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3124                                  PIPE_CONFIG(ADDR_SURF_P2) |
3125                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3126                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3127                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3128                                  PIPE_CONFIG(ADDR_SURF_P2) |
3129                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3130                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3131                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3132                                  PIPE_CONFIG(ADDR_SURF_P2) |
3133                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3134                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3135                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3136                                  PIPE_CONFIG(ADDR_SURF_P2) |
3137                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3138                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3139                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3140                                  PIPE_CONFIG(ADDR_SURF_P2) |
3141                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3142                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3143                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3144                                  PIPE_CONFIG(ADDR_SURF_P2) |
3145                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3146                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3147                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3148                                  PIPE_CONFIG(ADDR_SURF_P2) |
3149                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3150                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3151                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3152                                  PIPE_CONFIG(ADDR_SURF_P2) |
3153                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3154                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3155                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3156                                  PIPE_CONFIG(ADDR_SURF_P2) |
3157                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3158                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3159                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3160                                  PIPE_CONFIG(ADDR_SURF_P2) |
3161                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3162                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3163                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3164                                  PIPE_CONFIG(ADDR_SURF_P2) |
3165                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3166                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3167
3168                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3169                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3170                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3171                                 NUM_BANKS(ADDR_SURF_8_BANK));
3172                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3173                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3174                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3175                                 NUM_BANKS(ADDR_SURF_8_BANK));
3176                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3177                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3178                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3179                                 NUM_BANKS(ADDR_SURF_8_BANK));
3180                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3181                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3182                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3183                                 NUM_BANKS(ADDR_SURF_8_BANK));
3184                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3185                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3186                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3187                                 NUM_BANKS(ADDR_SURF_8_BANK));
3188                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3189                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3190                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3191                                 NUM_BANKS(ADDR_SURF_8_BANK));
3192                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3193                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3194                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3195                                 NUM_BANKS(ADDR_SURF_8_BANK));
3196                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3197                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3198                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3199                                 NUM_BANKS(ADDR_SURF_16_BANK));
3200                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3201                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3202                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3203                                 NUM_BANKS(ADDR_SURF_16_BANK));
3204                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3205                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3206                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3207                                  NUM_BANKS(ADDR_SURF_16_BANK));
3208                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3209                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3210                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211                                  NUM_BANKS(ADDR_SURF_16_BANK));
3212                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3214                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215                                  NUM_BANKS(ADDR_SURF_16_BANK));
3216                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3219                                  NUM_BANKS(ADDR_SURF_16_BANK));
3220                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3222                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3223                                  NUM_BANKS(ADDR_SURF_8_BANK));
3224
3225                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3226                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3227                             reg_offset != 23)
3228                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3229
3230                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3231                         if (reg_offset != 7)
3232                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3233
3234                 break;
3235         default:
3236                 dev_warn(adev->dev,
3237                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3238                          adev->asic_type);
3239                 /* fall through */
3240
3241         case CHIP_CARRIZO:
3242                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3243                                 PIPE_CONFIG(ADDR_SURF_P2) |
3244                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3245                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3246                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3247                                 PIPE_CONFIG(ADDR_SURF_P2) |
3248                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3249                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3250                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3251                                 PIPE_CONFIG(ADDR_SURF_P2) |
3252                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3253                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3254                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255                                 PIPE_CONFIG(ADDR_SURF_P2) |
3256                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3257                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3258                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3259                                 PIPE_CONFIG(ADDR_SURF_P2) |
3260                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3261                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3262                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3263                                 PIPE_CONFIG(ADDR_SURF_P2) |
3264                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3265                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3266                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3267                                 PIPE_CONFIG(ADDR_SURF_P2) |
3268                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3269                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3270                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3271                                 PIPE_CONFIG(ADDR_SURF_P2));
3272                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273                                 PIPE_CONFIG(ADDR_SURF_P2) |
3274                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3275                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3276                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3277                                  PIPE_CONFIG(ADDR_SURF_P2) |
3278                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3279                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3280                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3281                                  PIPE_CONFIG(ADDR_SURF_P2) |
3282                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3283                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3284                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3285                                  PIPE_CONFIG(ADDR_SURF_P2) |
3286                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3287                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3288                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3289                                  PIPE_CONFIG(ADDR_SURF_P2) |
3290                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3291                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3292                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3293                                  PIPE_CONFIG(ADDR_SURF_P2) |
3294                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3295                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3296                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3297                                  PIPE_CONFIG(ADDR_SURF_P2) |
3298                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3299                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3300                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3301                                  PIPE_CONFIG(ADDR_SURF_P2) |
3302                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3303                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3304                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3305                                  PIPE_CONFIG(ADDR_SURF_P2) |
3306                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3307                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3308                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3309                                  PIPE_CONFIG(ADDR_SURF_P2) |
3310                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3311                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3312                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3313                                  PIPE_CONFIG(ADDR_SURF_P2) |
3314                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3315                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3316                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3317                                  PIPE_CONFIG(ADDR_SURF_P2) |
3318                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3319                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3320                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3321                                  PIPE_CONFIG(ADDR_SURF_P2) |
3322                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3323                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3324                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3325                                  PIPE_CONFIG(ADDR_SURF_P2) |
3326                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3327                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3328                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3329                                  PIPE_CONFIG(ADDR_SURF_P2) |
3330                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3331                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3332                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3333                                  PIPE_CONFIG(ADDR_SURF_P2) |
3334                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3335                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3336                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3337                                  PIPE_CONFIG(ADDR_SURF_P2) |
3338                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3339                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3340                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3341                                  PIPE_CONFIG(ADDR_SURF_P2) |
3342                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3343                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3344
3345                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3346                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3347                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3348                                 NUM_BANKS(ADDR_SURF_8_BANK));
3349                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3350                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3351                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3352                                 NUM_BANKS(ADDR_SURF_8_BANK));
3353                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3354                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3355                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3356                                 NUM_BANKS(ADDR_SURF_8_BANK));
3357                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3358                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3359                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3360                                 NUM_BANKS(ADDR_SURF_8_BANK));
3361                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3362                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3363                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3364                                 NUM_BANKS(ADDR_SURF_8_BANK));
3365                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3366                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3367                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3368                                 NUM_BANKS(ADDR_SURF_8_BANK));
3369                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3370                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3371                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3372                                 NUM_BANKS(ADDR_SURF_8_BANK));
3373                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3374                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3375                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3376                                 NUM_BANKS(ADDR_SURF_16_BANK));
3377                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3378                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3379                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3380                                 NUM_BANKS(ADDR_SURF_16_BANK));
3381                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3382                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3383                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3384                                  NUM_BANKS(ADDR_SURF_16_BANK));
3385                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3386                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3387                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3388                                  NUM_BANKS(ADDR_SURF_16_BANK));
3389                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3390                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3391                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3392                                  NUM_BANKS(ADDR_SURF_16_BANK));
3393                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3394                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3395                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3396                                  NUM_BANKS(ADDR_SURF_16_BANK));
3397                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3398                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3399                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3400                                  NUM_BANKS(ADDR_SURF_8_BANK));
3401
3402                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3403                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3404                             reg_offset != 23)
3405                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3406
3407                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3408                         if (reg_offset != 7)
3409                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3410
3411                 break;
3412         }
3413 }
3414
3415 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3416                                   u32 se_num, u32 sh_num, u32 instance)
3417 {
3418         u32 data;
3419
3420         if (instance == 0xffffffff)
3421                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3422         else
3423                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3424
3425         if (se_num == 0xffffffff)
3426                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3427         else
3428                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3429
3430         if (sh_num == 0xffffffff)
3431                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3432         else
3433                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3434
3435         WREG32(mmGRBM_GFX_INDEX, data);
3436 }
3437
3438 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
3439                                   u32 me, u32 pipe, u32 q, u32 vm)
3440 {
3441         vi_srbm_select(adev, me, pipe, q, vm);
3442 }
3443
3444 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3445 {
3446         u32 data, mask;
3447
3448         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3449                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3450
3451         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3452
3453         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
3454                                          adev->gfx.config.max_sh_per_se);
3455
3456         return (~data) & mask;
3457 }
3458
3459 static void
3460 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3461 {
3462         switch (adev->asic_type) {
3463         case CHIP_FIJI:
3464         case CHIP_VEGAM:
3465                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3466                           RB_XSEL2(1) | PKR_MAP(2) |
3467                           PKR_XSEL(1) | PKR_YSEL(1) |
3468                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3469                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3470                            SE_PAIR_YSEL(2);
3471                 break;
3472         case CHIP_TONGA:
3473         case CHIP_POLARIS10:
3474                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3475                           SE_XSEL(1) | SE_YSEL(1);
3476                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3477                            SE_PAIR_YSEL(2);
3478                 break;
3479         case CHIP_TOPAZ:
3480         case CHIP_CARRIZO:
3481                 *rconf |= RB_MAP_PKR0(2);
3482                 *rconf1 |= 0x0;
3483                 break;
3484         case CHIP_POLARIS11:
3485         case CHIP_POLARIS12:
3486                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3487                           SE_XSEL(1) | SE_YSEL(1);
3488                 *rconf1 |= 0x0;
3489                 break;
3490         case CHIP_STONEY:
3491                 *rconf |= 0x0;
3492                 *rconf1 |= 0x0;
3493                 break;
3494         default:
3495                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3496                 break;
3497         }
3498 }
3499
3500 static void
3501 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3502                                         u32 raster_config, u32 raster_config_1,
3503                                         unsigned rb_mask, unsigned num_rb)
3504 {
3505         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3506         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3507         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3508         unsigned rb_per_se = num_rb / num_se;
3509         unsigned se_mask[4];
3510         unsigned se;
3511
3512         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3513         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3514         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3515         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3516
3517         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3518         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3519         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3520
3521         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3522                              (!se_mask[2] && !se_mask[3]))) {
3523                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3524
3525                 if (!se_mask[0] && !se_mask[1]) {
3526                         raster_config_1 |=
3527                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3528                 } else {
3529                         raster_config_1 |=
3530                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3531                 }
3532         }
3533
3534         for (se = 0; se < num_se; se++) {
3535                 unsigned raster_config_se = raster_config;
3536                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3537                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3538                 int idx = (se / 2) * 2;
3539
3540                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3541                         raster_config_se &= ~SE_MAP_MASK;
3542
3543                         if (!se_mask[idx]) {
3544                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3545                         } else {
3546                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3547                         }
3548                 }
3549
3550                 pkr0_mask &= rb_mask;
3551                 pkr1_mask &= rb_mask;
3552                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3553                         raster_config_se &= ~PKR_MAP_MASK;
3554
3555                         if (!pkr0_mask) {
3556                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3557                         } else {
3558                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3559                         }
3560                 }
3561
3562                 if (rb_per_se >= 2) {
3563                         unsigned rb0_mask = 1 << (se * rb_per_se);
3564                         unsigned rb1_mask = rb0_mask << 1;
3565
3566                         rb0_mask &= rb_mask;
3567                         rb1_mask &= rb_mask;
3568                         if (!rb0_mask || !rb1_mask) {
3569                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3570
3571                                 if (!rb0_mask) {
3572                                         raster_config_se |=
3573                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3574                                 } else {
3575                                         raster_config_se |=
3576                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3577                                 }
3578                         }
3579
3580                         if (rb_per_se > 2) {
3581                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3582                                 rb1_mask = rb0_mask << 1;
3583                                 rb0_mask &= rb_mask;
3584                                 rb1_mask &= rb_mask;
3585                                 if (!rb0_mask || !rb1_mask) {
3586                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3587
3588                                         if (!rb0_mask) {
3589                                                 raster_config_se |=
3590                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3591                                         } else {
3592                                                 raster_config_se |=
3593                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3594                                         }
3595                                 }
3596                         }
3597                 }
3598
3599                 /* GRBM_GFX_INDEX has a different offset on VI */
3600                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3601                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3602                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3603         }
3604
3605         /* GRBM_GFX_INDEX has a different offset on VI */
3606         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3607 }
3608
3609 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3610 {
3611         int i, j;
3612         u32 data;
3613         u32 raster_config = 0, raster_config_1 = 0;
3614         u32 active_rbs = 0;
3615         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3616                                         adev->gfx.config.max_sh_per_se;
3617         unsigned num_rb_pipes;
3618
3619         mutex_lock(&adev->grbm_idx_mutex);
3620         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3621                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3622                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3623                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3624                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3625                                                rb_bitmap_width_per_sh);
3626                 }
3627         }
3628         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3629
3630         adev->gfx.config.backend_enable_mask = active_rbs;
3631         adev->gfx.config.num_rbs = hweight32(active_rbs);
3632
3633         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3634                              adev->gfx.config.max_shader_engines, 16);
3635
3636         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3637
3638         if (!adev->gfx.config.backend_enable_mask ||
3639                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3640                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3641                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3642         } else {
3643                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3644                                                         adev->gfx.config.backend_enable_mask,
3645                                                         num_rb_pipes);
3646         }
3647
3648         /* cache the values for userspace */
3649         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3650                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3651                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3652                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3653                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3654                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3655                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3656                         adev->gfx.config.rb_config[i][j].raster_config =
3657                                 RREG32(mmPA_SC_RASTER_CONFIG);
3658                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3659                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3660                 }
3661         }
3662         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3663         mutex_unlock(&adev->grbm_idx_mutex);
3664 }
3665
3666 /**
3667  * gfx_v8_0_init_compute_vmid - gart enable
3668  *
3669  * @adev: amdgpu_device pointer
3670  *
3671  * Initialize compute vmid sh_mem registers
3672  *
3673  */
3674 #define DEFAULT_SH_MEM_BASES    (0x6000)
3675 #define FIRST_COMPUTE_VMID      (8)
3676 #define LAST_COMPUTE_VMID       (16)
3677 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3678 {
3679         int i;
3680         uint32_t sh_mem_config;
3681         uint32_t sh_mem_bases;
3682
3683         /*
3684          * Configure apertures:
3685          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3686          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3687          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3688          */
3689         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3690
3691         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3692                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3693                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3694                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3695                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3696                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3697
3698         mutex_lock(&adev->srbm_mutex);
3699         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3700                 vi_srbm_select(adev, 0, 0, 0, i);
3701                 /* CP and shaders */
3702                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3703                 WREG32(mmSH_MEM_APE1_BASE, 1);
3704                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3705                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3706         }
3707         vi_srbm_select(adev, 0, 0, 0, 0);
3708         mutex_unlock(&adev->srbm_mutex);
3709
3710         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
3711            acccess. These should be enabled by FW for target VMIDs. */
3712         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3713                 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
3714                 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
3715                 WREG32(amdgpu_gds_reg_offset[i].gws, 0);
3716                 WREG32(amdgpu_gds_reg_offset[i].oa, 0);
3717         }
3718 }
3719
3720 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev)
3721 {
3722         int vmid;
3723
3724         /*
3725          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
3726          * access. Compute VMIDs should be enabled by FW for target VMIDs,
3727          * the driver can enable them for graphics. VMID0 should maintain
3728          * access so that HWS firmware can save/restore entries.
3729          */
3730         for (vmid = 1; vmid < 16; vmid++) {
3731                 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
3732                 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
3733                 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
3734                 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
3735         }
3736 }
3737
3738 static void gfx_v8_0_config_init(struct amdgpu_device *adev)
3739 {
3740         switch (adev->asic_type) {
3741         default:
3742                 adev->gfx.config.double_offchip_lds_buf = 1;
3743                 break;
3744         case CHIP_CARRIZO:
3745         case CHIP_STONEY:
3746                 adev->gfx.config.double_offchip_lds_buf = 0;
3747                 break;
3748         }
3749 }
3750
3751 static void gfx_v8_0_constants_init(struct amdgpu_device *adev)
3752 {
3753         u32 tmp, sh_static_mem_cfg;
3754         int i;
3755
3756         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3757         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3758         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3759         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3760
3761         gfx_v8_0_tiling_mode_table_init(adev);
3762         gfx_v8_0_setup_rb(adev);
3763         gfx_v8_0_get_cu_info(adev);
3764         gfx_v8_0_config_init(adev);
3765
3766         /* XXX SH_MEM regs */
3767         /* where to put LDS, scratch, GPUVM in FSA64 space */
3768         sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG,
3769                                    SWIZZLE_ENABLE, 1);
3770         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3771                                    ELEMENT_SIZE, 1);
3772         sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
3773                                    INDEX_STRIDE, 3);
3774         WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
3775
3776         mutex_lock(&adev->srbm_mutex);
3777         for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
3778                 vi_srbm_select(adev, 0, 0, 0, i);
3779                 /* CP and shaders */
3780                 if (i == 0) {
3781                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3782                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3783                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3784                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3785                         WREG32(mmSH_MEM_CONFIG, tmp);
3786                         WREG32(mmSH_MEM_BASES, 0);
3787                 } else {
3788                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3789                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3790                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3791                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3792                         WREG32(mmSH_MEM_CONFIG, tmp);
3793                         tmp = adev->gmc.shared_aperture_start >> 48;
3794                         WREG32(mmSH_MEM_BASES, tmp);
3795                 }
3796
3797                 WREG32(mmSH_MEM_APE1_BASE, 1);
3798                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3799         }
3800         vi_srbm_select(adev, 0, 0, 0, 0);
3801         mutex_unlock(&adev->srbm_mutex);
3802
3803         gfx_v8_0_init_compute_vmid(adev);
3804         gfx_v8_0_init_gds_vmid(adev);
3805
3806         mutex_lock(&adev->grbm_idx_mutex);
3807         /*
3808          * making sure that the following register writes will be broadcasted
3809          * to all the shaders
3810          */
3811         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3812
3813         WREG32(mmPA_SC_FIFO_SIZE,
3814                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3815                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3816                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3817                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3818                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3819                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3820                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3821                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3822
3823         tmp = RREG32(mmSPI_ARB_PRIORITY);
3824         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
3825         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
3826         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
3827         tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
3828         WREG32(mmSPI_ARB_PRIORITY, tmp);
3829
3830         mutex_unlock(&adev->grbm_idx_mutex);
3831
3832 }
3833
3834 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3835 {
3836         u32 i, j, k;
3837         u32 mask;
3838
3839         mutex_lock(&adev->grbm_idx_mutex);
3840         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3841                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3842                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3843                         for (k = 0; k < adev->usec_timeout; k++) {
3844                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3845                                         break;
3846                                 udelay(1);
3847                         }
3848                         if (k == adev->usec_timeout) {
3849                                 gfx_v8_0_select_se_sh(adev, 0xffffffff,
3850                                                       0xffffffff, 0xffffffff);
3851                                 mutex_unlock(&adev->grbm_idx_mutex);
3852                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
3853                                          i, j);
3854                                 return;
3855                         }
3856                 }
3857         }
3858         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3859         mutex_unlock(&adev->grbm_idx_mutex);
3860
3861         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3862                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3863                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3864                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3865         for (k = 0; k < adev->usec_timeout; k++) {
3866                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3867                         break;
3868                 udelay(1);
3869         }
3870 }
3871
3872 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3873                                                bool enable)
3874 {
3875         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3876
3877         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3878         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3879         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3880         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3881
3882         WREG32(mmCP_INT_CNTL_RING0, tmp);
3883 }
3884
3885 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3886 {
3887         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
3888         /* csib */
3889         WREG32(mmRLC_CSIB_ADDR_HI,
3890                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3891         WREG32(mmRLC_CSIB_ADDR_LO,
3892                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3893         WREG32(mmRLC_CSIB_LENGTH,
3894                         adev->gfx.rlc.clear_state_size);
3895 }
3896
3897 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3898                                 int ind_offset,
3899                                 int list_size,
3900                                 int *unique_indices,
3901                                 int *indices_count,
3902                                 int max_indices,
3903                                 int *ind_start_offsets,
3904                                 int *offset_count,
3905                                 int max_offset)
3906 {
3907         int indices;
3908         bool new_entry = true;
3909
3910         for (; ind_offset < list_size; ind_offset++) {
3911
3912                 if (new_entry) {
3913                         new_entry = false;
3914                         ind_start_offsets[*offset_count] = ind_offset;
3915                         *offset_count = *offset_count + 1;
3916                         BUG_ON(*offset_count >= max_offset);
3917                 }
3918
3919                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3920                         new_entry = true;
3921                         continue;
3922                 }
3923
3924                 ind_offset += 2;
3925
3926                 /* look for the matching indice */
3927                 for (indices = 0;
3928                         indices < *indices_count;
3929                         indices++) {
3930                         if (unique_indices[indices] ==
3931                                 register_list_format[ind_offset])
3932                                 break;
3933                 }
3934
3935                 if (indices >= *indices_count) {
3936                         unique_indices[*indices_count] =
3937                                 register_list_format[ind_offset];
3938                         indices = *indices_count;
3939                         *indices_count = *indices_count + 1;
3940                         BUG_ON(*indices_count >= max_indices);
3941                 }
3942
3943                 register_list_format[ind_offset] = indices;
3944         }
3945 }
3946
3947 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3948 {
3949         int i, temp, data;
3950         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3951         int indices_count = 0;
3952         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3953         int offset_count = 0;
3954
3955         int list_size;
3956         unsigned int *register_list_format =
3957                 kmemdup(adev->gfx.rlc.register_list_format,
3958                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3959         if (!register_list_format)
3960                 return -ENOMEM;
3961
3962         gfx_v8_0_parse_ind_reg_list(register_list_format,
3963                                 RLC_FormatDirectRegListLength,
3964                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3965                                 unique_indices,
3966                                 &indices_count,
3967                                 ARRAY_SIZE(unique_indices),
3968                                 indirect_start_offsets,
3969                                 &offset_count,
3970                                 ARRAY_SIZE(indirect_start_offsets));
3971
3972         /* save and restore list */
3973         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
3974
3975         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3976         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3977                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3978
3979         /* indirect list */
3980         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3981         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3982                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3983
3984         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3985         list_size = list_size >> 1;
3986         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3987         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3988
3989         /* starting offsets starts */
3990         WREG32(mmRLC_GPM_SCRATCH_ADDR,
3991                 adev->gfx.rlc.starting_offsets_start);
3992         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
3993                 WREG32(mmRLC_GPM_SCRATCH_DATA,
3994                                 indirect_start_offsets[i]);
3995
3996         /* unique indices */
3997         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3998         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3999         for (i = 0; i < ARRAY_SIZE(unique_indices); i++) {
4000                 if (unique_indices[i] != 0) {
4001                         WREG32(temp + i, unique_indices[i] & 0x3FFFF);
4002                         WREG32(data + i, unique_indices[i] >> 20);
4003                 }
4004         }
4005         kfree(register_list_format);
4006
4007         return 0;
4008 }
4009
4010 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4011 {
4012         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4013 }
4014
4015 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4016 {
4017         uint32_t data;
4018
4019         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4020
4021         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4022         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4023         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4024         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4025         WREG32(mmRLC_PG_DELAY, data);
4026
4027         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4028         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4029
4030 }
4031
4032 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4033                                                 bool enable)
4034 {
4035         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4036 }
4037
4038 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4039                                                   bool enable)
4040 {
4041         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4042 }
4043
4044 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4045 {
4046         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4047 }
4048
4049 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4050 {
4051         if ((adev->asic_type == CHIP_CARRIZO) ||
4052             (adev->asic_type == CHIP_STONEY)) {
4053                 gfx_v8_0_init_csb(adev);
4054                 gfx_v8_0_init_save_restore_list(adev);
4055                 gfx_v8_0_enable_save_restore_machine(adev);
4056                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4057                 gfx_v8_0_init_power_gating(adev);
4058                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4059         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4060                    (adev->asic_type == CHIP_POLARIS12) ||
4061                    (adev->asic_type == CHIP_VEGAM)) {
4062                 gfx_v8_0_init_csb(adev);
4063                 gfx_v8_0_init_save_restore_list(adev);
4064                 gfx_v8_0_enable_save_restore_machine(adev);
4065                 gfx_v8_0_init_power_gating(adev);
4066         }
4067
4068 }
4069
4070 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4071 {
4072         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4073
4074         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4075         gfx_v8_0_wait_for_rlc_serdes(adev);
4076 }
4077
4078 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4079 {
4080         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4081         udelay(50);
4082
4083         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4084         udelay(50);
4085 }
4086
4087 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4088 {
4089         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4090
4091         /* carrizo do enable cp interrupt after cp inited */
4092         if (!(adev->flags & AMD_IS_APU))
4093                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4094
4095         udelay(50);
4096 }
4097
4098 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4099 {
4100         if (amdgpu_sriov_vf(adev)) {
4101                 gfx_v8_0_init_csb(adev);
4102                 return 0;
4103         }
4104
4105         adev->gfx.rlc.funcs->stop(adev);
4106         adev->gfx.rlc.funcs->reset(adev);
4107         gfx_v8_0_init_pg(adev);
4108         adev->gfx.rlc.funcs->start(adev);
4109
4110         return 0;
4111 }
4112
4113 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4114 {
4115         int i;
4116         u32 tmp = RREG32(mmCP_ME_CNTL);
4117
4118         if (enable) {
4119                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4120                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4121                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4122         } else {
4123                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4124                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4125                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4126                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4127                         adev->gfx.gfx_ring[i].sched.ready = false;
4128         }
4129         WREG32(mmCP_ME_CNTL, tmp);
4130         udelay(50);
4131 }
4132
4133 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4134 {
4135         u32 count = 0;
4136         const struct cs_section_def *sect = NULL;
4137         const struct cs_extent_def *ext = NULL;
4138
4139         /* begin clear state */
4140         count += 2;
4141         /* context control state */
4142         count += 3;
4143
4144         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4145                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4146                         if (sect->id == SECT_CONTEXT)
4147                                 count += 2 + ext->reg_count;
4148                         else
4149                                 return 0;
4150                 }
4151         }
4152         /* pa_sc_raster_config/pa_sc_raster_config1 */
4153         count += 4;
4154         /* end clear state */
4155         count += 2;
4156         /* clear state */
4157         count += 2;
4158
4159         return count;
4160 }
4161
4162 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4163 {
4164         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4165         const struct cs_section_def *sect = NULL;
4166         const struct cs_extent_def *ext = NULL;
4167         int r, i;
4168
4169         /* init the CP */
4170         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4171         WREG32(mmCP_ENDIAN_SWAP, 0);
4172         WREG32(mmCP_DEVICE_ID, 1);
4173
4174         gfx_v8_0_cp_gfx_enable(adev, true);
4175
4176         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4177         if (r) {
4178                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4179                 return r;
4180         }
4181
4182         /* clear state buffer */
4183         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4184         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4185
4186         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4187         amdgpu_ring_write(ring, 0x80000000);
4188         amdgpu_ring_write(ring, 0x80000000);
4189
4190         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4191                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4192                         if (sect->id == SECT_CONTEXT) {
4193                                 amdgpu_ring_write(ring,
4194                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4195                                                ext->reg_count));
4196                                 amdgpu_ring_write(ring,
4197                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4198                                 for (i = 0; i < ext->reg_count; i++)
4199                                         amdgpu_ring_write(ring, ext->extent[i]);
4200                         }
4201                 }
4202         }
4203
4204         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4205         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4206         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config);
4207         amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1);
4208
4209         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4210         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4211
4212         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4213         amdgpu_ring_write(ring, 0);
4214
4215         /* init the CE partitions */
4216         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4217         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4218         amdgpu_ring_write(ring, 0x8000);
4219         amdgpu_ring_write(ring, 0x8000);
4220
4221         amdgpu_ring_commit(ring);
4222
4223         return 0;
4224 }
4225 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring)
4226 {
4227         u32 tmp;
4228         /* no gfx doorbells on iceland */
4229         if (adev->asic_type == CHIP_TOPAZ)
4230                 return;
4231
4232         tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4233
4234         if (ring->use_doorbell) {
4235                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4236                                 DOORBELL_OFFSET, ring->doorbell_index);
4237                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4238                                                 DOORBELL_HIT, 0);
4239                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4240                                             DOORBELL_EN, 1);
4241         } else {
4242                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
4243         }
4244
4245         WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4246
4247         if (adev->flags & AMD_IS_APU)
4248                 return;
4249
4250         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4251                                         DOORBELL_RANGE_LOWER,
4252                                         adev->doorbell_index.gfx_ring0);
4253         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4254
4255         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4256                 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4257 }
4258
4259 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4260 {
4261         struct amdgpu_ring *ring;
4262         u32 tmp;
4263         u32 rb_bufsz;
4264         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4265
4266         /* Set the write pointer delay */
4267         WREG32(mmCP_RB_WPTR_DELAY, 0);
4268
4269         /* set the RB to use vmid 0 */
4270         WREG32(mmCP_RB_VMID, 0);
4271
4272         /* Set ring buffer size */
4273         ring = &adev->gfx.gfx_ring[0];
4274         rb_bufsz = order_base_2(ring->ring_size / 8);
4275         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4276         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4277         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4278         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4279 #ifdef __BIG_ENDIAN
4280         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4281 #endif
4282         WREG32(mmCP_RB0_CNTL, tmp);
4283
4284         /* Initialize the ring buffer's read and write pointers */
4285         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4286         ring->wptr = 0;
4287         WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4288
4289         /* set the wb address wether it's enabled or not */
4290         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4291         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4292         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4293
4294         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4295         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4296         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4297         mdelay(1);
4298         WREG32(mmCP_RB0_CNTL, tmp);
4299
4300         rb_addr = ring->gpu_addr >> 8;
4301         WREG32(mmCP_RB0_BASE, rb_addr);
4302         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4303
4304         gfx_v8_0_set_cpg_door_bell(adev, ring);
4305         /* start the ring */
4306         amdgpu_ring_clear_ring(ring);
4307         gfx_v8_0_cp_gfx_start(adev);
4308         ring->sched.ready = true;
4309
4310         return 0;
4311 }
4312
4313 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4314 {
4315         int i;
4316
4317         if (enable) {
4318                 WREG32(mmCP_MEC_CNTL, 0);
4319         } else {
4320                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4321                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4322                         adev->gfx.compute_ring[i].sched.ready = false;
4323                 adev->gfx.kiq.ring.sched.ready = false;
4324         }
4325         udelay(50);
4326 }
4327
4328 /* KIQ functions */
4329 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4330 {
4331         uint32_t tmp;
4332         struct amdgpu_device *adev = ring->adev;
4333
4334         /* tell RLC which is KIQ queue */
4335         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4336         tmp &= 0xffffff00;
4337         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4338         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4339         tmp |= 0x80;
4340         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4341 }
4342
4343 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
4344 {
4345         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4346         uint64_t queue_mask = 0;
4347         int r, i;
4348
4349         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
4350                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
4351                         continue;
4352
4353                 /* This situation may be hit in the future if a new HW
4354                  * generation exposes more than 64 queues. If so, the
4355                  * definition of queue_mask needs updating */
4356                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
4357                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
4358                         break;
4359                 }
4360
4361                 queue_mask |= (1ull << i);
4362         }
4363
4364         r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8);
4365         if (r) {
4366                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4367                 return r;
4368         }
4369         /* set resources */
4370         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4371         amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4372         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
4373         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
4374         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
4375         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
4376         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
4377         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
4378         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4379                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4380                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4381                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4382
4383                 /* map queues */
4384                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4385                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4386                 amdgpu_ring_write(kiq_ring,
4387                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1));
4388                 amdgpu_ring_write(kiq_ring,
4389                                   PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
4390                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
4391                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
4392                                   PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
4393                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4394                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4395                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4396                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4397         }
4398
4399         amdgpu_ring_commit(kiq_ring);
4400
4401         return 0;
4402 }
4403
4404 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
4405 {
4406         int i, r = 0;
4407
4408         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
4409                 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
4410                 for (i = 0; i < adev->usec_timeout; i++) {
4411                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
4412                                 break;
4413                         udelay(1);
4414                 }
4415                 if (i == adev->usec_timeout)
4416                         r = -ETIMEDOUT;
4417         }
4418         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
4419         WREG32(mmCP_HQD_PQ_RPTR, 0);
4420         WREG32(mmCP_HQD_PQ_WPTR, 0);
4421
4422         return r;
4423 }
4424
4425 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4426 {
4427         struct amdgpu_device *adev = ring->adev;
4428         struct vi_mqd *mqd = ring->mqd_ptr;
4429         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4430         uint32_t tmp;
4431
4432         mqd->header = 0xC0310800;
4433         mqd->compute_pipelinestat_enable = 0x00000001;
4434         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4435         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4436         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4437         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4438         mqd->compute_misc_reserved = 0x00000003;
4439         mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4440                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4441         mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4442                                                      + offsetof(struct vi_mqd_allocation, dynamic_cu_mask));
4443         eop_base_addr = ring->eop_gpu_addr >> 8;
4444         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4445         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4446
4447         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4448         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4449         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4450                         (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
4451
4452         mqd->cp_hqd_eop_control = tmp;
4453
4454         /* enable doorbell? */
4455         tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
4456                             CP_HQD_PQ_DOORBELL_CONTROL,
4457                             DOORBELL_EN,
4458                             ring->use_doorbell ? 1 : 0);
4459
4460         mqd->cp_hqd_pq_doorbell_control = tmp;
4461
4462         /* set the pointer to the MQD */
4463         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
4464         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
4465
4466         /* set MQD vmid to 0 */
4467         tmp = RREG32(mmCP_MQD_CONTROL);
4468         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4469         mqd->cp_mqd_control = tmp;
4470
4471         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4472         hqd_gpu_addr = ring->gpu_addr >> 8;
4473         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4474         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4475
4476         /* set up the HQD, this is similar to CP_RB0_CNTL */
4477         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4478         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4479                             (order_base_2(ring->ring_size / 4) - 1));
4480         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4481                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4482 #ifdef __BIG_ENDIAN
4483         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4484 #endif
4485         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4486         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4487         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4488         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4489         mqd->cp_hqd_pq_control = tmp;
4490
4491         /* set the wb address whether it's enabled or not */
4492         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4493         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4494         mqd->cp_hqd_pq_rptr_report_addr_hi =
4495                 upper_32_bits(wb_gpu_addr) & 0xffff;
4496
4497         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4498         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4499         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4500         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4501
4502         tmp = 0;
4503         /* enable the doorbell if requested */
4504         if (ring->use_doorbell) {
4505                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4506                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4507                                 DOORBELL_OFFSET, ring->doorbell_index);
4508
4509                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4510                                          DOORBELL_EN, 1);
4511                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4512                                          DOORBELL_SOURCE, 0);
4513                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4514                                          DOORBELL_HIT, 0);
4515         }
4516
4517         mqd->cp_hqd_pq_doorbell_control = tmp;
4518
4519         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4520         ring->wptr = 0;
4521         mqd->cp_hqd_pq_wptr = ring->wptr;
4522         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4523
4524         /* set the vmid for the queue */
4525         mqd->cp_hqd_vmid = 0;
4526
4527         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4528         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4529         mqd->cp_hqd_persistent_state = tmp;
4530
4531         /* set MTYPE */
4532         tmp = RREG32(mmCP_HQD_IB_CONTROL);
4533         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
4534         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
4535         mqd->cp_hqd_ib_control = tmp;
4536
4537         tmp = RREG32(mmCP_HQD_IQ_TIMER);
4538         tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
4539         mqd->cp_hqd_iq_timer = tmp;
4540
4541         tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
4542         tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
4543         mqd->cp_hqd_ctx_save_control = tmp;
4544
4545         /* defaults */
4546         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
4547         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
4548         mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
4549         mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
4550         mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
4551         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
4552         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
4553         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
4554         mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
4555         mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
4556         mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
4557         mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
4558         mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
4559         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
4560         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
4561
4562         /* activate the queue */
4563         mqd->cp_hqd_active = 1;
4564
4565         return 0;
4566 }
4567
4568 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
4569                         struct vi_mqd *mqd)
4570 {
4571         uint32_t mqd_reg;
4572         uint32_t *mqd_data;
4573
4574         /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
4575         mqd_data = &mqd->cp_mqd_base_addr_lo;
4576
4577         /* disable wptr polling */
4578         WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
4579
4580         /* program all HQD registers */
4581         for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
4582                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4583
4584         /* Tonga errata: EOP RPTR/WPTR should be left unmodified.
4585          * This is safe since EOP RPTR==WPTR for any inactive HQD
4586          * on ASICs that do not support context-save.
4587          * EOP writes/reads can start anywhere in the ring.
4588          */
4589         if (adev->asic_type != CHIP_TONGA) {
4590                 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
4591                 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
4592                 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
4593         }
4594
4595         for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
4596                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4597
4598         /* activate the HQD */
4599         for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
4600                 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
4601
4602         return 0;
4603 }
4604
4605 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4606 {
4607         struct amdgpu_device *adev = ring->adev;
4608         struct vi_mqd *mqd = ring->mqd_ptr;
4609         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
4610
4611         gfx_v8_0_kiq_setting(ring);
4612
4613         if (adev->in_gpu_reset) { /* for GPU_RESET case */
4614                 /* reset MQD to a clean status */
4615                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4616                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4617
4618                 /* reset ring buffer */
4619                 ring->wptr = 0;
4620                 amdgpu_ring_clear_ring(ring);
4621                 mutex_lock(&adev->srbm_mutex);
4622                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4623                 gfx_v8_0_mqd_commit(adev, mqd);
4624                 vi_srbm_select(adev, 0, 0, 0, 0);
4625                 mutex_unlock(&adev->srbm_mutex);
4626         } else {
4627                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4628                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4629                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4630                 mutex_lock(&adev->srbm_mutex);
4631                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4632                 gfx_v8_0_mqd_init(ring);
4633                 gfx_v8_0_mqd_commit(adev, mqd);
4634                 vi_srbm_select(adev, 0, 0, 0, 0);
4635                 mutex_unlock(&adev->srbm_mutex);
4636
4637                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4638                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4639         }
4640
4641         return 0;
4642 }
4643
4644 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4645 {
4646         struct amdgpu_device *adev = ring->adev;
4647         struct vi_mqd *mqd = ring->mqd_ptr;
4648         int mqd_idx = ring - &adev->gfx.compute_ring[0];
4649
4650         if (!adev->in_gpu_reset && !adev->in_suspend) {
4651                 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4652                 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
4653                 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
4654                 mutex_lock(&adev->srbm_mutex);
4655                 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4656                 gfx_v8_0_mqd_init(ring);
4657                 vi_srbm_select(adev, 0, 0, 0, 0);
4658                 mutex_unlock(&adev->srbm_mutex);
4659
4660                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4661                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4662         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
4663                 /* reset MQD to a clean status */
4664                 if (adev->gfx.mec.mqd_backup[mqd_idx])
4665                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4666                 /* reset ring buffer */
4667                 ring->wptr = 0;
4668                 amdgpu_ring_clear_ring(ring);
4669         } else {
4670                 amdgpu_ring_clear_ring(ring);
4671         }
4672         return 0;
4673 }
4674
4675 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev)
4676 {
4677         if (adev->asic_type > CHIP_TONGA) {
4678                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2);
4679                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2);
4680         }
4681         /* enable doorbells */
4682         WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4683 }
4684
4685 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4686 {
4687         struct amdgpu_ring *ring;
4688         int r;
4689
4690         ring = &adev->gfx.kiq.ring;
4691
4692         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4693         if (unlikely(r != 0))
4694                 return r;
4695
4696         r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4697         if (unlikely(r != 0))
4698                 return r;
4699
4700         gfx_v8_0_kiq_init_queue(ring);
4701         amdgpu_bo_kunmap(ring->mqd_obj);
4702         ring->mqd_ptr = NULL;
4703         amdgpu_bo_unreserve(ring->mqd_obj);
4704         ring->sched.ready = true;
4705         return 0;
4706 }
4707
4708 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
4709 {
4710         struct amdgpu_ring *ring = NULL;
4711         int r = 0, i;
4712
4713         gfx_v8_0_cp_compute_enable(adev, true);
4714
4715         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4716                 ring = &adev->gfx.compute_ring[i];
4717
4718                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4719                 if (unlikely(r != 0))
4720                         goto done;
4721                 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr);
4722                 if (!r) {
4723                         r = gfx_v8_0_kcq_init_queue(ring);
4724                         amdgpu_bo_kunmap(ring->mqd_obj);
4725                         ring->mqd_ptr = NULL;
4726                 }
4727                 amdgpu_bo_unreserve(ring->mqd_obj);
4728                 if (r)
4729                         goto done;
4730         }
4731
4732         gfx_v8_0_set_mec_doorbell_range(adev);
4733
4734         r = gfx_v8_0_kiq_kcq_enable(adev);
4735         if (r)
4736                 goto done;
4737
4738 done:
4739         return r;
4740 }
4741
4742 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
4743 {
4744         int r, i;
4745         struct amdgpu_ring *ring;
4746
4747         /* collect all the ring_tests here, gfx, kiq, compute */
4748         ring = &adev->gfx.gfx_ring[0];
4749         r = amdgpu_ring_test_helper(ring);
4750         if (r)
4751                 return r;
4752
4753         ring = &adev->gfx.kiq.ring;
4754         r = amdgpu_ring_test_helper(ring);
4755         if (r)
4756                 return r;
4757
4758         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4759                 ring = &adev->gfx.compute_ring[i];
4760                 amdgpu_ring_test_helper(ring);
4761         }
4762
4763         return 0;
4764 }
4765
4766 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4767 {
4768         int r;
4769
4770         if (!(adev->flags & AMD_IS_APU))
4771                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4772
4773         r = gfx_v8_0_kiq_resume(adev);
4774         if (r)
4775                 return r;
4776
4777         r = gfx_v8_0_cp_gfx_resume(adev);
4778         if (r)
4779                 return r;
4780
4781         r = gfx_v8_0_kcq_resume(adev);
4782         if (r)
4783                 return r;
4784
4785         r = gfx_v8_0_cp_test_all_rings(adev);
4786         if (r)
4787                 return r;
4788
4789         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4790
4791         return 0;
4792 }
4793
4794 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4795 {
4796         gfx_v8_0_cp_gfx_enable(adev, enable);
4797         gfx_v8_0_cp_compute_enable(adev, enable);
4798 }
4799
4800 static int gfx_v8_0_hw_init(void *handle)
4801 {
4802         int r;
4803         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4804
4805         gfx_v8_0_init_golden_registers(adev);
4806         gfx_v8_0_constants_init(adev);
4807
4808         r = adev->gfx.rlc.funcs->resume(adev);
4809         if (r)
4810                 return r;
4811
4812         r = gfx_v8_0_cp_resume(adev);
4813
4814         return r;
4815 }
4816
4817 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev)
4818 {
4819         int r, i;
4820         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
4821
4822         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
4823         if (r)
4824                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
4825
4826         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4827                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4828
4829                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
4830                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
4831                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
4832                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
4833                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
4834                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
4835                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
4836                 amdgpu_ring_write(kiq_ring, 0);
4837                 amdgpu_ring_write(kiq_ring, 0);
4838                 amdgpu_ring_write(kiq_ring, 0);
4839         }
4840         r = amdgpu_ring_test_helper(kiq_ring);
4841         if (r)
4842                 DRM_ERROR("KCQ disable failed\n");
4843
4844         return r;
4845 }
4846
4847 static bool gfx_v8_0_is_idle(void *handle)
4848 {
4849         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4850
4851         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)
4852                 || RREG32(mmGRBM_STATUS2) != 0x8)
4853                 return false;
4854         else
4855                 return true;
4856 }
4857
4858 static bool gfx_v8_0_rlc_is_idle(void *handle)
4859 {
4860         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4861
4862         if (RREG32(mmGRBM_STATUS2) != 0x8)
4863                 return false;
4864         else
4865                 return true;
4866 }
4867
4868 static int gfx_v8_0_wait_for_rlc_idle(void *handle)
4869 {
4870         unsigned int i;
4871         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4872
4873         for (i = 0; i < adev->usec_timeout; i++) {
4874                 if (gfx_v8_0_rlc_is_idle(handle))
4875                         return 0;
4876
4877                 udelay(1);
4878         }
4879         return -ETIMEDOUT;
4880 }
4881
4882 static int gfx_v8_0_wait_for_idle(void *handle)
4883 {
4884         unsigned int i;
4885         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4886
4887         for (i = 0; i < adev->usec_timeout; i++) {
4888                 if (gfx_v8_0_is_idle(handle))
4889                         return 0;
4890
4891                 udelay(1);
4892         }
4893         return -ETIMEDOUT;
4894 }
4895
4896 static int gfx_v8_0_hw_fini(void *handle)
4897 {
4898         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4899
4900         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4901         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4902
4903         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4904
4905         amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0);
4906
4907         /* disable KCQ to avoid CPC touch memory not valid anymore */
4908         gfx_v8_0_kcq_disable(adev);
4909
4910         if (amdgpu_sriov_vf(adev)) {
4911                 pr_debug("For SRIOV client, shouldn't do anything.\n");
4912                 return 0;
4913         }
4914         amdgpu_gfx_rlc_enter_safe_mode(adev);
4915         if (!gfx_v8_0_wait_for_idle(adev))
4916                 gfx_v8_0_cp_enable(adev, false);
4917         else
4918                 pr_err("cp is busy, skip halt cp\n");
4919         if (!gfx_v8_0_wait_for_rlc_idle(adev))
4920                 adev->gfx.rlc.funcs->stop(adev);
4921         else
4922                 pr_err("rlc is busy, skip halt rlc\n");
4923         amdgpu_gfx_rlc_exit_safe_mode(adev);
4924
4925         return 0;
4926 }
4927
4928 static int gfx_v8_0_suspend(void *handle)
4929 {
4930         return gfx_v8_0_hw_fini(handle);
4931 }
4932
4933 static int gfx_v8_0_resume(void *handle)
4934 {
4935         return gfx_v8_0_hw_init(handle);
4936 }
4937
4938 static bool gfx_v8_0_check_soft_reset(void *handle)
4939 {
4940         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4941         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4942         u32 tmp;
4943
4944         /* GRBM_STATUS */
4945         tmp = RREG32(mmGRBM_STATUS);
4946         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4947                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4948                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4949                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4950                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4951                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
4952                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4953                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4954                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4955                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4956                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4957                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4958                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4959         }
4960
4961         /* GRBM_STATUS2 */
4962         tmp = RREG32(mmGRBM_STATUS2);
4963         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4964                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4965                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4966
4967         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
4968             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
4969             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
4970                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4971                                                 SOFT_RESET_CPF, 1);
4972                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4973                                                 SOFT_RESET_CPC, 1);
4974                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
4975                                                 SOFT_RESET_CPG, 1);
4976                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
4977                                                 SOFT_RESET_GRBM, 1);
4978         }
4979
4980         /* SRBM_STATUS */
4981         tmp = RREG32(mmSRBM_STATUS);
4982         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4983                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4984                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4985         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
4986                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4987                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
4988
4989         if (grbm_soft_reset || srbm_soft_reset) {
4990                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
4991                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
4992                 return true;
4993         } else {
4994                 adev->gfx.grbm_soft_reset = 0;
4995                 adev->gfx.srbm_soft_reset = 0;
4996                 return false;
4997         }
4998 }
4999
5000 static int gfx_v8_0_pre_soft_reset(void *handle)
5001 {
5002         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5003         u32 grbm_soft_reset = 0;
5004
5005         if ((!adev->gfx.grbm_soft_reset) &&
5006             (!adev->gfx.srbm_soft_reset))
5007                 return 0;
5008
5009         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5010
5011         /* stop the rlc */
5012         adev->gfx.rlc.funcs->stop(adev);
5013
5014         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5015             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5016                 /* Disable GFX parsing/prefetching */
5017                 gfx_v8_0_cp_gfx_enable(adev, false);
5018
5019         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5020             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5021             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5022             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5023                 int i;
5024
5025                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5026                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5027
5028                         mutex_lock(&adev->srbm_mutex);
5029                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5030                         gfx_v8_0_deactivate_hqd(adev, 2);
5031                         vi_srbm_select(adev, 0, 0, 0, 0);
5032                         mutex_unlock(&adev->srbm_mutex);
5033                 }
5034                 /* Disable MEC parsing/prefetching */
5035                 gfx_v8_0_cp_compute_enable(adev, false);
5036         }
5037
5038        return 0;
5039 }
5040
5041 static int gfx_v8_0_soft_reset(void *handle)
5042 {
5043         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5044         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5045         u32 tmp;
5046
5047         if ((!adev->gfx.grbm_soft_reset) &&
5048             (!adev->gfx.srbm_soft_reset))
5049                 return 0;
5050
5051         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5052         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5053
5054         if (grbm_soft_reset || srbm_soft_reset) {
5055                 tmp = RREG32(mmGMCON_DEBUG);
5056                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5057                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5058                 WREG32(mmGMCON_DEBUG, tmp);
5059                 udelay(50);
5060         }
5061
5062         if (grbm_soft_reset) {
5063                 tmp = RREG32(mmGRBM_SOFT_RESET);
5064                 tmp |= grbm_soft_reset;
5065                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5066                 WREG32(mmGRBM_SOFT_RESET, tmp);
5067                 tmp = RREG32(mmGRBM_SOFT_RESET);
5068
5069                 udelay(50);
5070
5071                 tmp &= ~grbm_soft_reset;
5072                 WREG32(mmGRBM_SOFT_RESET, tmp);
5073                 tmp = RREG32(mmGRBM_SOFT_RESET);
5074         }
5075
5076         if (srbm_soft_reset) {
5077                 tmp = RREG32(mmSRBM_SOFT_RESET);
5078                 tmp |= srbm_soft_reset;
5079                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5080                 WREG32(mmSRBM_SOFT_RESET, tmp);
5081                 tmp = RREG32(mmSRBM_SOFT_RESET);
5082
5083                 udelay(50);
5084
5085                 tmp &= ~srbm_soft_reset;
5086                 WREG32(mmSRBM_SOFT_RESET, tmp);
5087                 tmp = RREG32(mmSRBM_SOFT_RESET);
5088         }
5089
5090         if (grbm_soft_reset || srbm_soft_reset) {
5091                 tmp = RREG32(mmGMCON_DEBUG);
5092                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5093                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5094                 WREG32(mmGMCON_DEBUG, tmp);
5095         }
5096
5097         /* Wait a little for things to settle down */
5098         udelay(50);
5099
5100         return 0;
5101 }
5102
5103 static int gfx_v8_0_post_soft_reset(void *handle)
5104 {
5105         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5106         u32 grbm_soft_reset = 0;
5107
5108         if ((!adev->gfx.grbm_soft_reset) &&
5109             (!adev->gfx.srbm_soft_reset))
5110                 return 0;
5111
5112         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5113
5114         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5115             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5116             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5117             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5118                 int i;
5119
5120                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5121                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5122
5123                         mutex_lock(&adev->srbm_mutex);
5124                         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5125                         gfx_v8_0_deactivate_hqd(adev, 2);
5126                         vi_srbm_select(adev, 0, 0, 0, 0);
5127                         mutex_unlock(&adev->srbm_mutex);
5128                 }
5129                 gfx_v8_0_kiq_resume(adev);
5130                 gfx_v8_0_kcq_resume(adev);
5131         }
5132
5133         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5134             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5135                 gfx_v8_0_cp_gfx_resume(adev);
5136
5137         gfx_v8_0_cp_test_all_rings(adev);
5138
5139         adev->gfx.rlc.funcs->start(adev);
5140
5141         return 0;
5142 }
5143
5144 /**
5145  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5146  *
5147  * @adev: amdgpu_device pointer
5148  *
5149  * Fetches a GPU clock counter snapshot.
5150  * Returns the 64 bit clock counter snapshot.
5151  */
5152 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5153 {
5154         uint64_t clock;
5155
5156         mutex_lock(&adev->gfx.gpu_clock_mutex);
5157         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5158         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5159                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5160         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5161         return clock;
5162 }
5163
5164 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5165                                           uint32_t vmid,
5166                                           uint32_t gds_base, uint32_t gds_size,
5167                                           uint32_t gws_base, uint32_t gws_size,
5168                                           uint32_t oa_base, uint32_t oa_size)
5169 {
5170         /* GDS Base */
5171         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5172         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5173                                 WRITE_DATA_DST_SEL(0)));
5174         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5175         amdgpu_ring_write(ring, 0);
5176         amdgpu_ring_write(ring, gds_base);
5177
5178         /* GDS Size */
5179         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5180         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5181                                 WRITE_DATA_DST_SEL(0)));
5182         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5183         amdgpu_ring_write(ring, 0);
5184         amdgpu_ring_write(ring, gds_size);
5185
5186         /* GWS */
5187         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5188         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5189                                 WRITE_DATA_DST_SEL(0)));
5190         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5191         amdgpu_ring_write(ring, 0);
5192         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5193
5194         /* OA */
5195         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5196         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5197                                 WRITE_DATA_DST_SEL(0)));
5198         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5199         amdgpu_ring_write(ring, 0);
5200         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5201 }
5202
5203 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5204 {
5205         WREG32(mmSQ_IND_INDEX,
5206                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5207                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5208                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5209                 (SQ_IND_INDEX__FORCE_READ_MASK));
5210         return RREG32(mmSQ_IND_DATA);
5211 }
5212
5213 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5214                            uint32_t wave, uint32_t thread,
5215                            uint32_t regno, uint32_t num, uint32_t *out)
5216 {
5217         WREG32(mmSQ_IND_INDEX,
5218                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5219                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5220                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5221                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5222                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5223                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5224         while (num--)
5225                 *(out++) = RREG32(mmSQ_IND_DATA);
5226 }
5227
5228 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5229 {
5230         /* type 0 wave data */
5231         dst[(*no_fields)++] = 0;
5232         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5233         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5234         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5235         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5236         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5237         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5238         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5239         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5240         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5241         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5242         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5243         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5244         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5245         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5246         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5247         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5248         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5249         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5250 }
5251
5252 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5253                                      uint32_t wave, uint32_t start,
5254                                      uint32_t size, uint32_t *dst)
5255 {
5256         wave_read_regs(
5257                 adev, simd, wave, 0,
5258                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5259 }
5260
5261
5262 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5263         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5264         .select_se_sh = &gfx_v8_0_select_se_sh,
5265         .read_wave_data = &gfx_v8_0_read_wave_data,
5266         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5267         .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
5268 };
5269
5270 static int gfx_v8_0_early_init(void *handle)
5271 {
5272         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5273
5274         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5275         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
5276         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5277         gfx_v8_0_set_ring_funcs(adev);
5278         gfx_v8_0_set_irq_funcs(adev);
5279         gfx_v8_0_set_gds_init(adev);
5280         gfx_v8_0_set_rlc_funcs(adev);
5281
5282         return 0;
5283 }
5284
5285 static int gfx_v8_0_late_init(void *handle)
5286 {
5287         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5288         int r;
5289
5290         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5291         if (r)
5292                 return r;
5293
5294         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5295         if (r)
5296                 return r;
5297
5298         /* requires IBs so do in late init after IB pool is initialized */
5299         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5300         if (r)
5301                 return r;
5302
5303         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
5304         if (r) {
5305                 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r);
5306                 return r;
5307         }
5308
5309         r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0);
5310         if (r) {
5311                 DRM_ERROR(
5312                         "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n",
5313                         r);
5314                 return r;
5315         }
5316
5317         return 0;
5318 }
5319
5320 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5321                                                        bool enable)
5322 {
5323         if (((adev->asic_type == CHIP_POLARIS11) ||
5324             (adev->asic_type == CHIP_POLARIS12) ||
5325             (adev->asic_type == CHIP_VEGAM)) &&
5326             adev->powerplay.pp_funcs->set_powergating_by_smu)
5327                 /* Send msg to SMU via Powerplay */
5328                 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable);
5329
5330         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5331 }
5332
5333 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5334                                                         bool enable)
5335 {
5336         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5337 }
5338
5339 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5340                 bool enable)
5341 {
5342         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5343 }
5344
5345 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5346                                           bool enable)
5347 {
5348         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5349 }
5350
5351 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5352                                                 bool enable)
5353 {
5354         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5355
5356         /* Read any GFX register to wake up GFX. */
5357         if (!enable)
5358                 RREG32(mmDB_RENDER_CONTROL);
5359 }
5360
5361 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5362                                           bool enable)
5363 {
5364         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5365                 cz_enable_gfx_cg_power_gating(adev, true);
5366                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5367                         cz_enable_gfx_pipeline_power_gating(adev, true);
5368         } else {
5369                 cz_enable_gfx_cg_power_gating(adev, false);
5370                 cz_enable_gfx_pipeline_power_gating(adev, false);
5371         }
5372 }
5373
5374 static int gfx_v8_0_set_powergating_state(void *handle,
5375                                           enum amd_powergating_state state)
5376 {
5377         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5378         bool enable = (state == AMD_PG_STATE_GATE);
5379
5380         if (amdgpu_sriov_vf(adev))
5381                 return 0;
5382
5383         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5384                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5385                                 AMD_PG_SUPPORT_CP |
5386                                 AMD_PG_SUPPORT_GFX_DMG))
5387                 amdgpu_gfx_rlc_enter_safe_mode(adev);
5388         switch (adev->asic_type) {
5389         case CHIP_CARRIZO:
5390         case CHIP_STONEY:
5391
5392                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5393                         cz_enable_sck_slow_down_on_power_up(adev, true);
5394                         cz_enable_sck_slow_down_on_power_down(adev, true);
5395                 } else {
5396                         cz_enable_sck_slow_down_on_power_up(adev, false);
5397                         cz_enable_sck_slow_down_on_power_down(adev, false);
5398                 }
5399                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5400                         cz_enable_cp_power_gating(adev, true);
5401                 else
5402                         cz_enable_cp_power_gating(adev, false);
5403
5404                 cz_update_gfx_cg_power_gating(adev, enable);
5405
5406                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5407                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5408                 else
5409                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5410
5411                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5412                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5413                 else
5414                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5415                 break;
5416         case CHIP_POLARIS11:
5417         case CHIP_POLARIS12:
5418         case CHIP_VEGAM:
5419                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5420                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5421                 else
5422                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5423
5424                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5425                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5426                 else
5427                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5428
5429                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5430                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5431                 else
5432                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5433                 break;
5434         default:
5435                 break;
5436         }
5437         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG |
5438                                 AMD_PG_SUPPORT_RLC_SMU_HS |
5439                                 AMD_PG_SUPPORT_CP |
5440                                 AMD_PG_SUPPORT_GFX_DMG))
5441                 amdgpu_gfx_rlc_exit_safe_mode(adev);
5442         return 0;
5443 }
5444
5445 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
5446 {
5447         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5448         int data;
5449
5450         if (amdgpu_sriov_vf(adev))
5451                 *flags = 0;
5452
5453         /* AMD_CG_SUPPORT_GFX_MGCG */
5454         data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5455         if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK))
5456                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5457
5458         /* AMD_CG_SUPPORT_GFX_CGLG */
5459         data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5460         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5461                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5462
5463         /* AMD_CG_SUPPORT_GFX_CGLS */
5464         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5465                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5466
5467         /* AMD_CG_SUPPORT_GFX_CGTS */
5468         data = RREG32(mmCGTS_SM_CTRL_REG);
5469         if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK))
5470                 *flags |= AMD_CG_SUPPORT_GFX_CGTS;
5471
5472         /* AMD_CG_SUPPORT_GFX_CGTS_LS */
5473         if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK))
5474                 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS;
5475
5476         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5477         data = RREG32(mmRLC_MEM_SLP_CNTL);
5478         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5479                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5480
5481         /* AMD_CG_SUPPORT_GFX_CP_LS */
5482         data = RREG32(mmCP_MEM_SLP_CNTL);
5483         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5484                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5485 }
5486
5487 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5488                                      uint32_t reg_addr, uint32_t cmd)
5489 {
5490         uint32_t data;
5491
5492         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5493
5494         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5495         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5496
5497         data = RREG32(mmRLC_SERDES_WR_CTRL);
5498         if (adev->asic_type == CHIP_STONEY)
5499                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5500                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5501                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5502                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5503                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5504                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5505                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5506                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5507                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5508         else
5509                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5510                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5511                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5512                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5513                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5514                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5515                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5516                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5517                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5518                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5519                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5520         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5521                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5522                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5523                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5524
5525         WREG32(mmRLC_SERDES_WR_CTRL, data);
5526 }
5527
5528 #define MSG_ENTER_RLC_SAFE_MODE     1
5529 #define MSG_EXIT_RLC_SAFE_MODE      0
5530 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5531 #define RLC_GPR_REG2__REQ__SHIFT 0
5532 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5533 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5534
5535 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev)
5536 {
5537         uint32_t rlc_setting;
5538
5539         rlc_setting = RREG32(mmRLC_CNTL);
5540         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
5541                 return false;
5542
5543         return true;
5544 }
5545
5546 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev)
5547 {
5548         uint32_t data;
5549         unsigned i;
5550         data = RREG32(mmRLC_CNTL);
5551         data |= RLC_SAFE_MODE__CMD_MASK;
5552         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5553         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5554         WREG32(mmRLC_SAFE_MODE, data);
5555
5556         /* wait for RLC_SAFE_MODE */
5557         for (i = 0; i < adev->usec_timeout; i++) {
5558                 if ((RREG32(mmRLC_GPM_STAT) &
5559                      (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5560                       RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5561                     (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5562                      RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5563                         break;
5564                 udelay(1);
5565         }
5566         for (i = 0; i < adev->usec_timeout; i++) {
5567                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5568                         break;
5569                 udelay(1);
5570         }
5571 }
5572
5573 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev)
5574 {
5575         uint32_t data;
5576         unsigned i;
5577
5578         data = RREG32(mmRLC_CNTL);
5579         data |= RLC_SAFE_MODE__CMD_MASK;
5580         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5581         WREG32(mmRLC_SAFE_MODE, data);
5582
5583         for (i = 0; i < adev->usec_timeout; i++) {
5584                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5585                         break;
5586                 udelay(1);
5587         }
5588 }
5589
5590 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5591         .is_rlc_enabled = gfx_v8_0_is_rlc_enabled,
5592         .set_safe_mode = gfx_v8_0_set_safe_mode,
5593         .unset_safe_mode = gfx_v8_0_unset_safe_mode,
5594         .init = gfx_v8_0_rlc_init,
5595         .get_csb_size = gfx_v8_0_get_csb_size,
5596         .get_csb_buffer = gfx_v8_0_get_csb_buffer,
5597         .get_cp_table_num = gfx_v8_0_cp_jump_table_num,
5598         .resume = gfx_v8_0_rlc_resume,
5599         .stop = gfx_v8_0_rlc_stop,
5600         .reset = gfx_v8_0_rlc_reset,
5601         .start = gfx_v8_0_rlc_start
5602 };
5603
5604 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5605                                                       bool enable)
5606 {
5607         uint32_t temp, data;
5608
5609         amdgpu_gfx_rlc_enter_safe_mode(adev);
5610
5611         /* It is disabled by HW by default */
5612         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5613                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5614                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5615                                 /* 1 - RLC memory Light sleep */
5616                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
5617
5618                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
5619                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
5620                 }
5621
5622                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5623                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5624                 if (adev->flags & AMD_IS_APU)
5625                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5626                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5627                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5628                 else
5629                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5630                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5631                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5632                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5633
5634                 if (temp != data)
5635                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5636
5637                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5638                 gfx_v8_0_wait_for_rlc_serdes(adev);
5639
5640                 /* 5 - clear mgcg override */
5641                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5642
5643                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5644                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5645                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5646                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5647                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5648                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5649                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5650                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5651                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5652                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5653                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5654                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5655                         if (temp != data)
5656                                 WREG32(mmCGTS_SM_CTRL_REG, data);
5657                 }
5658                 udelay(50);
5659
5660                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5661                 gfx_v8_0_wait_for_rlc_serdes(adev);
5662         } else {
5663                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5664                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5665                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5666                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5667                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5668                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5669                 if (temp != data)
5670                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5671
5672                 /* 2 - disable MGLS in RLC */
5673                 data = RREG32(mmRLC_MEM_SLP_CNTL);
5674                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5675                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5676                         WREG32(mmRLC_MEM_SLP_CNTL, data);
5677                 }
5678
5679                 /* 3 - disable MGLS in CP */
5680                 data = RREG32(mmCP_MEM_SLP_CNTL);
5681                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5682                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5683                         WREG32(mmCP_MEM_SLP_CNTL, data);
5684                 }
5685
5686                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5687                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5688                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5689                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5690                 if (temp != data)
5691                         WREG32(mmCGTS_SM_CTRL_REG, data);
5692
5693                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5694                 gfx_v8_0_wait_for_rlc_serdes(adev);
5695
5696                 /* 6 - set mgcg override */
5697                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5698
5699                 udelay(50);
5700
5701                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5702                 gfx_v8_0_wait_for_rlc_serdes(adev);
5703         }
5704
5705         amdgpu_gfx_rlc_exit_safe_mode(adev);
5706 }
5707
5708 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5709                                                       bool enable)
5710 {
5711         uint32_t temp, temp1, data, data1;
5712
5713         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5714
5715         amdgpu_gfx_rlc_enter_safe_mode(adev);
5716
5717         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5718                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5719                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5720                 if (temp1 != data1)
5721                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5722
5723                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5724                 gfx_v8_0_wait_for_rlc_serdes(adev);
5725
5726                 /* 2 - clear cgcg override */
5727                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5728
5729                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5730                 gfx_v8_0_wait_for_rlc_serdes(adev);
5731
5732                 /* 3 - write cmd to set CGLS */
5733                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5734
5735                 /* 4 - enable cgcg */
5736                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5737
5738                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5739                         /* enable cgls*/
5740                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5741
5742                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5743                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5744
5745                         if (temp1 != data1)
5746                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5747                 } else {
5748                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5749                 }
5750
5751                 if (temp != data)
5752                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5753
5754                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
5755                  * Cmp_busy/GFX_Idle interrupts
5756                  */
5757                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5758         } else {
5759                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5760                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5761
5762                 /* TEST CGCG */
5763                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5764                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5765                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5766                 if (temp1 != data1)
5767                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5768
5769                 /* read gfx register to wake up cgcg */
5770                 RREG32(mmCB_CGTT_SCLK_CTRL);
5771                 RREG32(mmCB_CGTT_SCLK_CTRL);
5772                 RREG32(mmCB_CGTT_SCLK_CTRL);
5773                 RREG32(mmCB_CGTT_SCLK_CTRL);
5774
5775                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5776                 gfx_v8_0_wait_for_rlc_serdes(adev);
5777
5778                 /* write cmd to Set CGCG Overrride */
5779                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5780
5781                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5782                 gfx_v8_0_wait_for_rlc_serdes(adev);
5783
5784                 /* write cmd to Clear CGLS */
5785                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5786
5787                 /* disable cgcg, cgls should be disabled too. */
5788                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5789                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5790                 if (temp != data)
5791                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5792                 /* enable interrupts again for PG */
5793                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5794         }
5795
5796         gfx_v8_0_wait_for_rlc_serdes(adev);
5797
5798         amdgpu_gfx_rlc_exit_safe_mode(adev);
5799 }
5800 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5801                                             bool enable)
5802 {
5803         if (enable) {
5804                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5805                  * ===  MGCG + MGLS + TS(CG/LS) ===
5806                  */
5807                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5808                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5809         } else {
5810                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5811                  * ===  CGCG + CGLS ===
5812                  */
5813                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5814                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5815         }
5816         return 0;
5817 }
5818
5819 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
5820                                           enum amd_clockgating_state state)
5821 {
5822         uint32_t msg_id, pp_state = 0;
5823         uint32_t pp_support_state = 0;
5824
5825         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5826                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5827                         pp_support_state = PP_STATE_SUPPORT_LS;
5828                         pp_state = PP_STATE_LS;
5829                 }
5830                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5831                         pp_support_state |= PP_STATE_SUPPORT_CG;
5832                         pp_state |= PP_STATE_CG;
5833                 }
5834                 if (state == AMD_CG_STATE_UNGATE)
5835                         pp_state = 0;
5836
5837                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5838                                 PP_BLOCK_GFX_CG,
5839                                 pp_support_state,
5840                                 pp_state);
5841                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5842                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5843         }
5844
5845         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5846                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5847                         pp_support_state = PP_STATE_SUPPORT_LS;
5848                         pp_state = PP_STATE_LS;
5849                 }
5850
5851                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5852                         pp_support_state |= PP_STATE_SUPPORT_CG;
5853                         pp_state |= PP_STATE_CG;
5854                 }
5855
5856                 if (state == AMD_CG_STATE_UNGATE)
5857                         pp_state = 0;
5858
5859                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5860                                 PP_BLOCK_GFX_MG,
5861                                 pp_support_state,
5862                                 pp_state);
5863                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5864                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5865         }
5866
5867         return 0;
5868 }
5869
5870 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
5871                                           enum amd_clockgating_state state)
5872 {
5873
5874         uint32_t msg_id, pp_state = 0;
5875         uint32_t pp_support_state = 0;
5876
5877         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
5878                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5879                         pp_support_state = PP_STATE_SUPPORT_LS;
5880                         pp_state = PP_STATE_LS;
5881                 }
5882                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
5883                         pp_support_state |= PP_STATE_SUPPORT_CG;
5884                         pp_state |= PP_STATE_CG;
5885                 }
5886                 if (state == AMD_CG_STATE_UNGATE)
5887                         pp_state = 0;
5888
5889                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5890                                 PP_BLOCK_GFX_CG,
5891                                 pp_support_state,
5892                                 pp_state);
5893                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5894                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5895         }
5896
5897         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
5898                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
5899                         pp_support_state = PP_STATE_SUPPORT_LS;
5900                         pp_state = PP_STATE_LS;
5901                 }
5902                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
5903                         pp_support_state |= PP_STATE_SUPPORT_CG;
5904                         pp_state |= PP_STATE_CG;
5905                 }
5906                 if (state == AMD_CG_STATE_UNGATE)
5907                         pp_state = 0;
5908
5909                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5910                                 PP_BLOCK_GFX_3D,
5911                                 pp_support_state,
5912                                 pp_state);
5913                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5914                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5915         }
5916
5917         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
5918                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5919                         pp_support_state = PP_STATE_SUPPORT_LS;
5920                         pp_state = PP_STATE_LS;
5921                 }
5922
5923                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
5924                         pp_support_state |= PP_STATE_SUPPORT_CG;
5925                         pp_state |= PP_STATE_CG;
5926                 }
5927
5928                 if (state == AMD_CG_STATE_UNGATE)
5929                         pp_state = 0;
5930
5931                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5932                                 PP_BLOCK_GFX_MG,
5933                                 pp_support_state,
5934                                 pp_state);
5935                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5936                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5937         }
5938
5939         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5940                 pp_support_state = PP_STATE_SUPPORT_LS;
5941
5942                 if (state == AMD_CG_STATE_UNGATE)
5943                         pp_state = 0;
5944                 else
5945                         pp_state = PP_STATE_LS;
5946
5947                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5948                                 PP_BLOCK_GFX_RLC,
5949                                 pp_support_state,
5950                                 pp_state);
5951                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5952                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5953         }
5954
5955         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5956                 pp_support_state = PP_STATE_SUPPORT_LS;
5957
5958                 if (state == AMD_CG_STATE_UNGATE)
5959                         pp_state = 0;
5960                 else
5961                         pp_state = PP_STATE_LS;
5962                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
5963                         PP_BLOCK_GFX_CP,
5964                         pp_support_state,
5965                         pp_state);
5966                 if (adev->powerplay.pp_funcs->set_clockgating_by_smu)
5967                         amdgpu_dpm_set_clockgating_by_smu(adev, msg_id);
5968         }
5969
5970         return 0;
5971 }
5972
5973 static int gfx_v8_0_set_clockgating_state(void *handle,
5974                                           enum amd_clockgating_state state)
5975 {
5976         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5977
5978         if (amdgpu_sriov_vf(adev))
5979                 return 0;
5980
5981         switch (adev->asic_type) {
5982         case CHIP_FIJI:
5983         case CHIP_CARRIZO:
5984         case CHIP_STONEY:
5985                 gfx_v8_0_update_gfx_clock_gating(adev,
5986                                                  state == AMD_CG_STATE_GATE);
5987                 break;
5988         case CHIP_TONGA:
5989                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
5990                 break;
5991         case CHIP_POLARIS10:
5992         case CHIP_POLARIS11:
5993         case CHIP_POLARIS12:
5994         case CHIP_VEGAM:
5995                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
5996                 break;
5997         default:
5998                 break;
5999         }
6000         return 0;
6001 }
6002
6003 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6004 {
6005         return ring->adev->wb.wb[ring->rptr_offs];
6006 }
6007
6008 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6009 {
6010         struct amdgpu_device *adev = ring->adev;
6011
6012         if (ring->use_doorbell)
6013                 /* XXX check if swapping is necessary on BE */
6014                 return ring->adev->wb.wb[ring->wptr_offs];
6015         else
6016                 return RREG32(mmCP_RB0_WPTR);
6017 }
6018
6019 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6020 {
6021         struct amdgpu_device *adev = ring->adev;
6022
6023         if (ring->use_doorbell) {
6024                 /* XXX check if swapping is necessary on BE */
6025                 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6026                 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6027         } else {
6028                 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
6029                 (void)RREG32(mmCP_RB0_WPTR);
6030         }
6031 }
6032
6033 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6034 {
6035         u32 ref_and_mask, reg_mem_engine;
6036
6037         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6038             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6039                 switch (ring->me) {
6040                 case 1:
6041                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6042                         break;
6043                 case 2:
6044                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6045                         break;
6046                 default:
6047                         return;
6048                 }
6049                 reg_mem_engine = 0;
6050         } else {
6051                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6052                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6053         }
6054
6055         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6056         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6057                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6058                                  reg_mem_engine));
6059         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6060         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6061         amdgpu_ring_write(ring, ref_and_mask);
6062         amdgpu_ring_write(ring, ref_and_mask);
6063         amdgpu_ring_write(ring, 0x20); /* poll interval */
6064 }
6065
6066 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6067 {
6068         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6069         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6070                 EVENT_INDEX(4));
6071
6072         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6073         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6074                 EVENT_INDEX(0));
6075 }
6076
6077 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6078                                         struct amdgpu_job *job,
6079                                         struct amdgpu_ib *ib,
6080                                         uint32_t flags)
6081 {
6082         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6083         u32 header, control = 0;
6084
6085         if (ib->flags & AMDGPU_IB_FLAG_CE)
6086                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6087         else
6088                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6089
6090         control |= ib->length_dw | (vmid << 24);
6091
6092         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
6093                 control |= INDIRECT_BUFFER_PRE_ENB(1);
6094
6095                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
6096                         gfx_v8_0_ring_emit_de_meta(ring);
6097         }
6098
6099         amdgpu_ring_write(ring, header);
6100         amdgpu_ring_write(ring,
6101 #ifdef __BIG_ENDIAN
6102                           (2 << 0) |
6103 #endif
6104                           (ib->gpu_addr & 0xFFFFFFFC));
6105         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6106         amdgpu_ring_write(ring, control);
6107 }
6108
6109 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6110                                           struct amdgpu_job *job,
6111                                           struct amdgpu_ib *ib,
6112                                           uint32_t flags)
6113 {
6114         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
6115         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
6116
6117         /* Currently, there is a high possibility to get wave ID mismatch
6118          * between ME and GDS, leading to a hw deadlock, because ME generates
6119          * different wave IDs than the GDS expects. This situation happens
6120          * randomly when at least 5 compute pipes use GDS ordered append.
6121          * The wave IDs generated by ME are also wrong after suspend/resume.
6122          * Those are probably bugs somewhere else in the kernel driver.
6123          *
6124          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
6125          * GDS to 0 for this ring (me/pipe).
6126          */
6127         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
6128                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
6129                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
6130                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
6131         }
6132
6133         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6134         amdgpu_ring_write(ring,
6135 #ifdef __BIG_ENDIAN
6136                                 (2 << 0) |
6137 #endif
6138                                 (ib->gpu_addr & 0xFFFFFFFC));
6139         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6140         amdgpu_ring_write(ring, control);
6141 }
6142
6143 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6144                                          u64 seq, unsigned flags)
6145 {
6146         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6147         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6148
6149         /* Workaround for cache flush problems. First send a dummy EOP
6150          * event down the pipe with seq one below.
6151          */
6152         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6153         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6154                                  EOP_TC_ACTION_EN |
6155                                  EOP_TC_WB_ACTION_EN |
6156                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6157                                  EVENT_INDEX(5)));
6158         amdgpu_ring_write(ring, addr & 0xfffffffc);
6159         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6160                                 DATA_SEL(1) | INT_SEL(0));
6161         amdgpu_ring_write(ring, lower_32_bits(seq - 1));
6162         amdgpu_ring_write(ring, upper_32_bits(seq - 1));
6163
6164         /* Then send the real EOP event down the pipe:
6165          * EVENT_WRITE_EOP - flush caches, send int */
6166         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6167         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6168                                  EOP_TC_ACTION_EN |
6169                                  EOP_TC_WB_ACTION_EN |
6170                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6171                                  EVENT_INDEX(5)));
6172         amdgpu_ring_write(ring, addr & 0xfffffffc);
6173         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6174                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6175         amdgpu_ring_write(ring, lower_32_bits(seq));
6176         amdgpu_ring_write(ring, upper_32_bits(seq));
6177
6178 }
6179
6180 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6181 {
6182         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6183         uint32_t seq = ring->fence_drv.sync_seq;
6184         uint64_t addr = ring->fence_drv.gpu_addr;
6185
6186         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6187         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6188                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6189                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6190         amdgpu_ring_write(ring, addr & 0xfffffffc);
6191         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6192         amdgpu_ring_write(ring, seq);
6193         amdgpu_ring_write(ring, 0xffffffff);
6194         amdgpu_ring_write(ring, 4); /* poll interval */
6195 }
6196
6197 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6198                                         unsigned vmid, uint64_t pd_addr)
6199 {
6200         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6201
6202         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
6203
6204         /* wait for the invalidate to complete */
6205         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6206         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6207                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6208                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6209         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6210         amdgpu_ring_write(ring, 0);
6211         amdgpu_ring_write(ring, 0); /* ref */
6212         amdgpu_ring_write(ring, 0); /* mask */
6213         amdgpu_ring_write(ring, 0x20); /* poll interval */
6214
6215         /* compute doesn't have PFP */
6216         if (usepfp) {
6217                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6218                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6219                 amdgpu_ring_write(ring, 0x0);
6220         }
6221 }
6222
6223 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6224 {
6225         return ring->adev->wb.wb[ring->wptr_offs];
6226 }
6227
6228 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6229 {
6230         struct amdgpu_device *adev = ring->adev;
6231
6232         /* XXX check if swapping is necessary on BE */
6233         adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
6234         WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
6235 }
6236
6237 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
6238                                            bool acquire)
6239 {
6240         struct amdgpu_device *adev = ring->adev;
6241         int pipe_num, tmp, reg;
6242         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
6243
6244         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
6245
6246         /* first me only has 2 entries, GFX and HP3D */
6247         if (ring->me > 0)
6248                 pipe_num -= 2;
6249
6250         reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
6251         tmp = RREG32(reg);
6252         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
6253         WREG32(reg, tmp);
6254 }
6255
6256 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
6257                                             struct amdgpu_ring *ring,
6258                                             bool acquire)
6259 {
6260         int i, pipe;
6261         bool reserve;
6262         struct amdgpu_ring *iring;
6263
6264         mutex_lock(&adev->gfx.pipe_reserve_mutex);
6265         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
6266         if (acquire)
6267                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6268         else
6269                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6270
6271         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
6272                 /* Clear all reservations - everyone reacquires all resources */
6273                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
6274                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
6275                                                        true);
6276
6277                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
6278                         gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
6279                                                        true);
6280         } else {
6281                 /* Lower all pipes without a current reservation */
6282                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
6283                         iring = &adev->gfx.gfx_ring[i];
6284                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6285                                                            iring->me,
6286                                                            iring->pipe,
6287                                                            0);
6288                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6289                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6290                 }
6291
6292                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
6293                         iring = &adev->gfx.compute_ring[i];
6294                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
6295                                                            iring->me,
6296                                                            iring->pipe,
6297                                                            0);
6298                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
6299                         gfx_v8_0_ring_set_pipe_percent(iring, reserve);
6300                 }
6301         }
6302
6303         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
6304 }
6305
6306 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
6307                                       struct amdgpu_ring *ring,
6308                                       bool acquire)
6309 {
6310         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
6311         uint32_t queue_priority = acquire ? 0xf : 0x0;
6312
6313         mutex_lock(&adev->srbm_mutex);
6314         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
6315
6316         WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
6317         WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
6318
6319         vi_srbm_select(adev, 0, 0, 0, 0);
6320         mutex_unlock(&adev->srbm_mutex);
6321 }
6322 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
6323                                                enum drm_sched_priority priority)
6324 {
6325         struct amdgpu_device *adev = ring->adev;
6326         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
6327
6328         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
6329                 return;
6330
6331         gfx_v8_0_hqd_set_priority(adev, ring, acquire);
6332         gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
6333 }
6334
6335 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6336                                              u64 addr, u64 seq,
6337                                              unsigned flags)
6338 {
6339         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6340         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6341
6342         /* RELEASE_MEM - flush caches, send int */
6343         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6344         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6345                                  EOP_TC_ACTION_EN |
6346                                  EOP_TC_WB_ACTION_EN |
6347                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6348                                  EVENT_INDEX(5)));
6349         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6350         amdgpu_ring_write(ring, addr & 0xfffffffc);
6351         amdgpu_ring_write(ring, upper_32_bits(addr));
6352         amdgpu_ring_write(ring, lower_32_bits(seq));
6353         amdgpu_ring_write(ring, upper_32_bits(seq));
6354 }
6355
6356 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6357                                          u64 seq, unsigned int flags)
6358 {
6359         /* we only allocate 32bit for each seq wb address */
6360         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
6361
6362         /* write fence seq to the "addr" */
6363         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6364         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6365                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6366         amdgpu_ring_write(ring, lower_32_bits(addr));
6367         amdgpu_ring_write(ring, upper_32_bits(addr));
6368         amdgpu_ring_write(ring, lower_32_bits(seq));
6369
6370         if (flags & AMDGPU_FENCE_FLAG_INT) {
6371                 /* set register to trigger INT */
6372                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6373                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6374                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6375                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6376                 amdgpu_ring_write(ring, 0);
6377                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6378         }
6379 }
6380
6381 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6382 {
6383         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6384         amdgpu_ring_write(ring, 0);
6385 }
6386
6387 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6388 {
6389         uint32_t dw2 = 0;
6390
6391         if (amdgpu_sriov_vf(ring->adev))
6392                 gfx_v8_0_ring_emit_ce_meta(ring);
6393
6394         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6395         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6396                 gfx_v8_0_ring_emit_vgt_flush(ring);
6397                 /* set load_global_config & load_global_uconfig */
6398                 dw2 |= 0x8001;
6399                 /* set load_cs_sh_regs */
6400                 dw2 |= 0x01000000;
6401                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6402                 dw2 |= 0x10002;
6403
6404                 /* set load_ce_ram if preamble presented */
6405                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6406                         dw2 |= 0x10000000;
6407         } else {
6408                 /* still load_ce_ram if this is the first time preamble presented
6409                  * although there is no context switch happens.
6410                  */
6411                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6412                         dw2 |= 0x10000000;
6413         }
6414
6415         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6416         amdgpu_ring_write(ring, dw2);
6417         amdgpu_ring_write(ring, 0);
6418 }
6419
6420 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
6421 {
6422         unsigned ret;
6423
6424         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
6425         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
6426         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
6427         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
6428         ret = ring->wptr & ring->buf_mask;
6429         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
6430         return ret;
6431 }
6432
6433 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
6434 {
6435         unsigned cur;
6436
6437         BUG_ON(offset > ring->buf_mask);
6438         BUG_ON(ring->ring[offset] != 0x55aa55aa);
6439
6440         cur = (ring->wptr & ring->buf_mask) - 1;
6441         if (likely(cur > offset))
6442                 ring->ring[offset] = cur - offset;
6443         else
6444                 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
6445 }
6446
6447 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
6448 {
6449         struct amdgpu_device *adev = ring->adev;
6450
6451         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
6452         amdgpu_ring_write(ring, 0 |     /* src: register*/
6453                                 (5 << 8) |      /* dst: memory */
6454                                 (1 << 20));     /* write confirm */
6455         amdgpu_ring_write(ring, reg);
6456         amdgpu_ring_write(ring, 0);
6457         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
6458                                 adev->virt.reg_val_offs * 4));
6459         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
6460                                 adev->virt.reg_val_offs * 4));
6461 }
6462
6463 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
6464                                   uint32_t val)
6465 {
6466         uint32_t cmd;
6467
6468         switch (ring->funcs->type) {
6469         case AMDGPU_RING_TYPE_GFX:
6470                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
6471                 break;
6472         case AMDGPU_RING_TYPE_KIQ:
6473                 cmd = 1 << 16; /* no inc addr */
6474                 break;
6475         default:
6476                 cmd = WR_CONFIRM;
6477                 break;
6478         }
6479
6480         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6481         amdgpu_ring_write(ring, cmd);
6482         amdgpu_ring_write(ring, reg);
6483         amdgpu_ring_write(ring, 0);
6484         amdgpu_ring_write(ring, val);
6485 }
6486
6487 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
6488 {
6489         struct amdgpu_device *adev = ring->adev;
6490         uint32_t value = 0;
6491
6492         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
6493         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
6494         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
6495         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
6496         WREG32(mmSQ_CMD, value);
6497 }
6498
6499 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6500                                                  enum amdgpu_interrupt_state state)
6501 {
6502         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6503                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6504 }
6505
6506 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6507                                                      int me, int pipe,
6508                                                      enum amdgpu_interrupt_state state)
6509 {
6510         u32 mec_int_cntl, mec_int_cntl_reg;
6511
6512         /*
6513          * amdgpu controls only the first MEC. That's why this function only
6514          * handles the setting of interrupts for this specific MEC. All other
6515          * pipes' interrupts are set by amdkfd.
6516          */
6517
6518         if (me == 1) {
6519                 switch (pipe) {
6520                 case 0:
6521                         mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6522                         break;
6523                 case 1:
6524                         mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL;
6525                         break;
6526                 case 2:
6527                         mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL;
6528                         break;
6529                 case 3:
6530                         mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL;
6531                         break;
6532                 default:
6533                         DRM_DEBUG("invalid pipe %d\n", pipe);
6534                         return;
6535                 }
6536         } else {
6537                 DRM_DEBUG("invalid me %d\n", me);
6538                 return;
6539         }
6540
6541         switch (state) {
6542         case AMDGPU_IRQ_STATE_DISABLE:
6543                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6544                 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6545                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6546                 break;
6547         case AMDGPU_IRQ_STATE_ENABLE:
6548                 mec_int_cntl = RREG32(mec_int_cntl_reg);
6549                 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
6550                 WREG32(mec_int_cntl_reg, mec_int_cntl);
6551                 break;
6552         default:
6553                 break;
6554         }
6555 }
6556
6557 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6558                                              struct amdgpu_irq_src *source,
6559                                              unsigned type,
6560                                              enum amdgpu_interrupt_state state)
6561 {
6562         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6563                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6564
6565         return 0;
6566 }
6567
6568 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6569                                               struct amdgpu_irq_src *source,
6570                                               unsigned type,
6571                                               enum amdgpu_interrupt_state state)
6572 {
6573         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6574                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6575
6576         return 0;
6577 }
6578
6579 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6580                                             struct amdgpu_irq_src *src,
6581                                             unsigned type,
6582                                             enum amdgpu_interrupt_state state)
6583 {
6584         switch (type) {
6585         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6586                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6587                 break;
6588         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6589                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6590                 break;
6591         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6592                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6593                 break;
6594         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6595                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6596                 break;
6597         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6598                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6599                 break;
6600         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6601                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6602                 break;
6603         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6604                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6605                 break;
6606         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6607                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6608                 break;
6609         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6610                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6611                 break;
6612         default:
6613                 break;
6614         }
6615         return 0;
6616 }
6617
6618 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev,
6619                                          struct amdgpu_irq_src *source,
6620                                          unsigned int type,
6621                                          enum amdgpu_interrupt_state state)
6622 {
6623         int enable_flag;
6624
6625         switch (state) {
6626         case AMDGPU_IRQ_STATE_DISABLE:
6627                 enable_flag = 0;
6628                 break;
6629
6630         case AMDGPU_IRQ_STATE_ENABLE:
6631                 enable_flag = 1;
6632                 break;
6633
6634         default:
6635                 return -EINVAL;
6636         }
6637
6638         WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6639         WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6640         WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6641         WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6642         WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag);
6643         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6644                      enable_flag);
6645         WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6646                      enable_flag);
6647         WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6648                      enable_flag);
6649         WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6650                      enable_flag);
6651         WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6652                      enable_flag);
6653         WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6654                      enable_flag);
6655         WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6656                      enable_flag);
6657         WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE,
6658                      enable_flag);
6659
6660         return 0;
6661 }
6662
6663 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev,
6664                                      struct amdgpu_irq_src *source,
6665                                      unsigned int type,
6666                                      enum amdgpu_interrupt_state state)
6667 {
6668         int enable_flag;
6669
6670         switch (state) {
6671         case AMDGPU_IRQ_STATE_DISABLE:
6672                 enable_flag = 1;
6673                 break;
6674
6675         case AMDGPU_IRQ_STATE_ENABLE:
6676                 enable_flag = 0;
6677                 break;
6678
6679         default:
6680                 return -EINVAL;
6681         }
6682
6683         WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL,
6684                      enable_flag);
6685
6686         return 0;
6687 }
6688
6689 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6690                             struct amdgpu_irq_src *source,
6691                             struct amdgpu_iv_entry *entry)
6692 {
6693         int i;
6694         u8 me_id, pipe_id, queue_id;
6695         struct amdgpu_ring *ring;
6696
6697         DRM_DEBUG("IH: CP EOP\n");
6698         me_id = (entry->ring_id & 0x0c) >> 2;
6699         pipe_id = (entry->ring_id & 0x03) >> 0;
6700         queue_id = (entry->ring_id & 0x70) >> 4;
6701
6702         switch (me_id) {
6703         case 0:
6704                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6705                 break;
6706         case 1:
6707         case 2:
6708                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6709                         ring = &adev->gfx.compute_ring[i];
6710                         /* Per-queue interrupt is supported for MEC starting from VI.
6711                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6712                           */
6713                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6714                                 amdgpu_fence_process(ring);
6715                 }
6716                 break;
6717         }
6718         return 0;
6719 }
6720
6721 static void gfx_v8_0_fault(struct amdgpu_device *adev,
6722                            struct amdgpu_iv_entry *entry)
6723 {
6724         u8 me_id, pipe_id, queue_id;
6725         struct amdgpu_ring *ring;
6726         int i;
6727
6728         me_id = (entry->ring_id & 0x0c) >> 2;
6729         pipe_id = (entry->ring_id & 0x03) >> 0;
6730         queue_id = (entry->ring_id & 0x70) >> 4;
6731
6732         switch (me_id) {
6733         case 0:
6734                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6735                 break;
6736         case 1:
6737         case 2:
6738                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6739                         ring = &adev->gfx.compute_ring[i];
6740                         if (ring->me == me_id && ring->pipe == pipe_id &&
6741                             ring->queue == queue_id)
6742                                 drm_sched_fault(&ring->sched);
6743                 }
6744                 break;
6745         }
6746 }
6747
6748 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6749                                  struct amdgpu_irq_src *source,
6750                                  struct amdgpu_iv_entry *entry)
6751 {
6752         DRM_ERROR("Illegal register access in command stream\n");
6753         gfx_v8_0_fault(adev, entry);
6754         return 0;
6755 }
6756
6757 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6758                                   struct amdgpu_irq_src *source,
6759                                   struct amdgpu_iv_entry *entry)
6760 {
6761         DRM_ERROR("Illegal instruction in command stream\n");
6762         gfx_v8_0_fault(adev, entry);
6763         return 0;
6764 }
6765
6766 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6767                                      struct amdgpu_irq_src *source,
6768                                      struct amdgpu_iv_entry *entry)
6769 {
6770         DRM_ERROR("CP EDC/ECC error detected.");
6771         return 0;
6772 }
6773
6774 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data)
6775 {
6776         u32 enc, se_id, sh_id, cu_id;
6777         char type[20];
6778         int sq_edc_source = -1;
6779
6780         enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING);
6781         se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID);
6782
6783         switch (enc) {
6784                 case 0:
6785                         DRM_INFO("SQ general purpose intr detected:"
6786                                         "se_id %d, immed_overflow %d, host_reg_overflow %d,"
6787                                         "host_cmd_overflow %d, cmd_timestamp %d,"
6788                                         "reg_timestamp %d, thread_trace_buff_full %d,"
6789                                         "wlt %d, thread_trace %d.\n",
6790                                         se_id,
6791                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW),
6792                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW),
6793                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW),
6794                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP),
6795                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP),
6796                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL),
6797                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT),
6798                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE)
6799                                         );
6800                         break;
6801                 case 1:
6802                 case 2:
6803
6804                         cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID);
6805                         sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID);
6806
6807                         /*
6808                          * This function can be called either directly from ISR
6809                          * or from BH in which case we can access SQ_EDC_INFO
6810                          * instance
6811                          */
6812                         if (in_task()) {
6813                                 mutex_lock(&adev->grbm_idx_mutex);
6814                                 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
6815
6816                                 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
6817
6818                                 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6819                                 mutex_unlock(&adev->grbm_idx_mutex);
6820                         }
6821
6822                         if (enc == 1)
6823                                 sprintf(type, "instruction intr");
6824                         else
6825                                 sprintf(type, "EDC/ECC error");
6826
6827                         DRM_INFO(
6828                                 "SQ %s detected: "
6829                                         "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d "
6830                                         "trap %s, sq_ed_info.source %s.\n",
6831                                         type, se_id, sh_id, cu_id,
6832                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID),
6833                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID),
6834                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID),
6835                                         REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false",
6836                                         (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable"
6837                                 );
6838                         break;
6839                 default:
6840                         DRM_ERROR("SQ invalid encoding type\n.");
6841         }
6842 }
6843
6844 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work)
6845 {
6846
6847         struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work);
6848         struct sq_work *sq_work = container_of(work, struct sq_work, work);
6849
6850         gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data);
6851 }
6852
6853 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev,
6854                            struct amdgpu_irq_src *source,
6855                            struct amdgpu_iv_entry *entry)
6856 {
6857         unsigned ih_data = entry->src_data[0];
6858
6859         /*
6860          * Try to submit work so SQ_EDC_INFO can be accessed from
6861          * BH. If previous work submission hasn't finished yet
6862          * just print whatever info is possible directly from the ISR.
6863          */
6864         if (work_pending(&adev->gfx.sq_work.work)) {
6865                 gfx_v8_0_parse_sq_irq(adev, ih_data);
6866         } else {
6867                 adev->gfx.sq_work.ih_data = ih_data;
6868                 schedule_work(&adev->gfx.sq_work.work);
6869         }
6870
6871         return 0;
6872 }
6873
6874 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6875         .name = "gfx_v8_0",
6876         .early_init = gfx_v8_0_early_init,
6877         .late_init = gfx_v8_0_late_init,
6878         .sw_init = gfx_v8_0_sw_init,
6879         .sw_fini = gfx_v8_0_sw_fini,
6880         .hw_init = gfx_v8_0_hw_init,
6881         .hw_fini = gfx_v8_0_hw_fini,
6882         .suspend = gfx_v8_0_suspend,
6883         .resume = gfx_v8_0_resume,
6884         .is_idle = gfx_v8_0_is_idle,
6885         .wait_for_idle = gfx_v8_0_wait_for_idle,
6886         .check_soft_reset = gfx_v8_0_check_soft_reset,
6887         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6888         .soft_reset = gfx_v8_0_soft_reset,
6889         .post_soft_reset = gfx_v8_0_post_soft_reset,
6890         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6891         .set_powergating_state = gfx_v8_0_set_powergating_state,
6892         .get_clockgating_state = gfx_v8_0_get_clockgating_state,
6893 };
6894
6895 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6896         .type = AMDGPU_RING_TYPE_GFX,
6897         .align_mask = 0xff,
6898         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6899         .support_64bit_ptrs = false,
6900         .get_rptr = gfx_v8_0_ring_get_rptr,
6901         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6902         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6903         .emit_frame_size = /* maximum 215dw if count 16 IBs in */
6904                 5 +  /* COND_EXEC */
6905                 7 +  /* PIPELINE_SYNC */
6906                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
6907                 12 +  /* FENCE for VM_FLUSH */
6908                 20 + /* GDS switch */
6909                 4 + /* double SWITCH_BUFFER,
6910                        the first COND_EXEC jump to the place just
6911                            prior to this double SWITCH_BUFFER  */
6912                 5 + /* COND_EXEC */
6913                 7 +      /*     HDP_flush */
6914                 4 +      /*     VGT_flush */
6915                 14 + /* CE_META */
6916                 31 + /* DE_META */
6917                 3 + /* CNTX_CTRL */
6918                 5 + /* HDP_INVL */
6919                 12 + 12 + /* FENCE x2 */
6920                 2, /* SWITCH_BUFFER */
6921         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6922         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6923         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6924         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6925         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6926         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6927         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6928         .test_ring = gfx_v8_0_ring_test_ring,
6929         .test_ib = gfx_v8_0_ring_test_ib,
6930         .insert_nop = amdgpu_ring_insert_nop,
6931         .pad_ib = amdgpu_ring_generic_pad_ib,
6932         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6933         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6934         .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
6935         .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
6936         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6937         .soft_recovery = gfx_v8_0_ring_soft_recovery,
6938 };
6939
6940 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6941         .type = AMDGPU_RING_TYPE_COMPUTE,
6942         .align_mask = 0xff,
6943         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6944         .support_64bit_ptrs = false,
6945         .get_rptr = gfx_v8_0_ring_get_rptr,
6946         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6947         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6948         .emit_frame_size =
6949                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6950                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6951                 5 + /* hdp_invalidate */
6952                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6953                 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
6954                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6955         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6956         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6957         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6958         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6959         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6960         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6961         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6962         .test_ring = gfx_v8_0_ring_test_ring,
6963         .test_ib = gfx_v8_0_ring_test_ib,
6964         .insert_nop = amdgpu_ring_insert_nop,
6965         .pad_ib = amdgpu_ring_generic_pad_ib,
6966         .set_priority = gfx_v8_0_ring_set_priority_compute,
6967         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6968 };
6969
6970 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6971         .type = AMDGPU_RING_TYPE_KIQ,
6972         .align_mask = 0xff,
6973         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6974         .support_64bit_ptrs = false,
6975         .get_rptr = gfx_v8_0_ring_get_rptr,
6976         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6977         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6978         .emit_frame_size =
6979                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6980                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6981                 5 + /* hdp_invalidate */
6982                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6983                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6984                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6985         .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
6986         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6987         .test_ring = gfx_v8_0_ring_test_ring,
6988         .insert_nop = amdgpu_ring_insert_nop,
6989         .pad_ib = amdgpu_ring_generic_pad_ib,
6990         .emit_rreg = gfx_v8_0_ring_emit_rreg,
6991         .emit_wreg = gfx_v8_0_ring_emit_wreg,
6992 };
6993
6994 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6995 {
6996         int i;
6997
6998         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
6999
7000         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7001                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7002
7003         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7004                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7005 }
7006
7007 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7008         .set = gfx_v8_0_set_eop_interrupt_state,
7009         .process = gfx_v8_0_eop_irq,
7010 };
7011
7012 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7013         .set = gfx_v8_0_set_priv_reg_fault_state,
7014         .process = gfx_v8_0_priv_reg_irq,
7015 };
7016
7017 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7018         .set = gfx_v8_0_set_priv_inst_fault_state,
7019         .process = gfx_v8_0_priv_inst_irq,
7020 };
7021
7022 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = {
7023         .set = gfx_v8_0_set_cp_ecc_int_state,
7024         .process = gfx_v8_0_cp_ecc_error_irq,
7025 };
7026
7027 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = {
7028         .set = gfx_v8_0_set_sq_int_state,
7029         .process = gfx_v8_0_sq_irq,
7030 };
7031
7032 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7033 {
7034         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7035         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7036
7037         adev->gfx.priv_reg_irq.num_types = 1;
7038         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7039
7040         adev->gfx.priv_inst_irq.num_types = 1;
7041         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7042
7043         adev->gfx.cp_ecc_error_irq.num_types = 1;
7044         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs;
7045
7046         adev->gfx.sq_irq.num_types = 1;
7047         adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs;
7048 }
7049
7050 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7051 {
7052         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7053 }
7054
7055 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7056 {
7057         /* init asci gds info */
7058         adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE);
7059         adev->gds.gws_size = 64;
7060         adev->gds.oa_size = 16;
7061         adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
7062 }
7063
7064 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7065                                                  u32 bitmap)
7066 {
7067         u32 data;
7068
7069         if (!bitmap)
7070                 return;
7071
7072         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7073         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7074
7075         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7076 }
7077
7078 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7079 {
7080         u32 data, mask;
7081
7082         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7083                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7084
7085         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7086
7087         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7088 }
7089
7090 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7091 {
7092         int i, j, k, counter, active_cu_number = 0;
7093         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7094         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7095         unsigned disable_masks[4 * 2];
7096         u32 ao_cu_num;
7097
7098         memset(cu_info, 0, sizeof(*cu_info));
7099
7100         if (adev->flags & AMD_IS_APU)
7101                 ao_cu_num = 2;
7102         else
7103                 ao_cu_num = adev->gfx.config.max_cu_per_sh;
7104
7105         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7106
7107         mutex_lock(&adev->grbm_idx_mutex);
7108         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7109                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7110                         mask = 1;
7111                         ao_bitmap = 0;
7112                         counter = 0;
7113                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7114                         if (i < 4 && j < 2)
7115                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7116                                         adev, disable_masks[i * 2 + j]);
7117                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7118                         cu_info->bitmap[i][j] = bitmap;
7119
7120                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7121                                 if (bitmap & mask) {
7122                                         if (counter < ao_cu_num)
7123                                                 ao_bitmap |= mask;
7124                                         counter ++;
7125                                 }
7126                                 mask <<= 1;
7127                         }
7128                         active_cu_number += counter;
7129                         if (i < 2 && j < 2)
7130                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7131                         cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
7132                 }
7133         }
7134         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7135         mutex_unlock(&adev->grbm_idx_mutex);
7136
7137         cu_info->number = active_cu_number;
7138         cu_info->ao_cu_mask = ao_cu_mask;
7139         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7140         cu_info->max_waves_per_simd = 10;
7141         cu_info->max_scratch_slots_per_cu = 32;
7142         cu_info->wave_front_size = 64;
7143         cu_info->lds_size = 64;
7144 }
7145
7146 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7147 {
7148         .type = AMD_IP_BLOCK_TYPE_GFX,
7149         .major = 8,
7150         .minor = 0,
7151         .rev = 0,
7152         .funcs = &gfx_v8_0_ip_funcs,
7153 };
7154
7155 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7156 {
7157         .type = AMD_IP_BLOCK_TYPE_GFX,
7158         .major = 8,
7159         .minor = 1,
7160         .rev = 0,
7161         .funcs = &gfx_v8_0_ip_funcs,
7162 };
7163
7164 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
7165 {
7166         uint64_t ce_payload_addr;
7167         int cnt_ce;
7168         union {
7169                 struct vi_ce_ib_state regular;
7170                 struct vi_ce_ib_state_chained_ib chained;
7171         } ce_payload = {};
7172
7173         if (ring->adev->virt.chained_ib_support) {
7174                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7175                         offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
7176                 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
7177         } else {
7178                 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
7179                         offsetof(struct vi_gfx_meta_data, ce_payload);
7180                 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
7181         }
7182
7183         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce));
7184         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
7185                                 WRITE_DATA_DST_SEL(8) |
7186                                 WR_CONFIRM) |
7187                                 WRITE_DATA_CACHE_POLICY(0));
7188         amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr));
7189         amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr));
7190         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2);
7191 }
7192
7193 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
7194 {
7195         uint64_t de_payload_addr, gds_addr, csa_addr;
7196         int cnt_de;
7197         union {
7198                 struct vi_de_ib_state regular;
7199                 struct vi_de_ib_state_chained_ib chained;
7200         } de_payload = {};
7201
7202         csa_addr = amdgpu_csa_vaddr(ring->adev);
7203         gds_addr = csa_addr + 4096;
7204         if (ring->adev->virt.chained_ib_support) {
7205                 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
7206                 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr);
7207                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload);
7208                 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2;
7209         } else {
7210                 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr);
7211                 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr);
7212                 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload);
7213                 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2;
7214         }
7215
7216         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de));
7217         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
7218                                 WRITE_DATA_DST_SEL(8) |
7219                                 WR_CONFIRM) |
7220                                 WRITE_DATA_CACHE_POLICY(0));
7221         amdgpu_ring_write(ring, lower_32_bits(de_payload_addr));
7222         amdgpu_ring_write(ring, upper_32_bits(de_payload_addr));
7223         amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2);
7224 }