Merge tag 'v5.3-rc3' into drm-next-5.4
[sfrench/cifs-2.6.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
108 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
109 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
110
111 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
112 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
113 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
114 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
115 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
116 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
117 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
118 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
119 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
120 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
121 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
122 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
123
124 enum ta_ras_gfx_subblock {
125         /*CPC*/
126         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
127         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
128         TA_RAS_BLOCK__GFX_CPC_UCODE,
129         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
130         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
131         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
132         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
133         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
134         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
135         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
136         /* CPF*/
137         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
138         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
139         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
140         TA_RAS_BLOCK__GFX_CPF_TAG,
141         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
142         /* CPG*/
143         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
144         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
145         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
146         TA_RAS_BLOCK__GFX_CPG_TAG,
147         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
148         /* GDS*/
149         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
150         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
151         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
152         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
153         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
154         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
155         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
156         /* SPI*/
157         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
158         /* SQ*/
159         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
160         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
161         TA_RAS_BLOCK__GFX_SQ_LDS_D,
162         TA_RAS_BLOCK__GFX_SQ_LDS_I,
163         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
164         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
165         /* SQC (3 ranges)*/
166         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
167         /* SQC range 0*/
168         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
169         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
170                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
171         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
172         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
173         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
174         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
175         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
176         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
177         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
178                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
179         /* SQC range 1*/
180         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
181         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
182                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
183         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
184         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
185         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
186         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
187         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
188         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
189         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
190         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
191         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
192                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
193         /* SQC range 2*/
194         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
195         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
196                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
197         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
198         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
199         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
200         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
201         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
202         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
203         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
204         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
205         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
206                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
207         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
208         /* TA*/
209         TA_RAS_BLOCK__GFX_TA_INDEX_START,
210         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
211         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
212         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
213         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
214         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
215         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
216         /* TCA*/
217         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
218         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
219         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
220         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
221         /* TCC (5 sub-ranges)*/
222         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
223         /* TCC range 0*/
224         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
225         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
226         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
227         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
228         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
229         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
230         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
231         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
232         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
233         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
234         /* TCC range 1*/
235         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
236         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
237         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
238         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
239                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
240         /* TCC range 2*/
241         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
242         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
243         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
244         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
245         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
246         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
247         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
248         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
249         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
250         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
251                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
252         /* TCC range 3*/
253         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
254         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
255         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
256         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
257                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
258         /* TCC range 4*/
259         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
260         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
261                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
262         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
263         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
264                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
265         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
266         /* TCI*/
267         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
268         /* TCP*/
269         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
270         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
271         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
272         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
273         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
274         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
275         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
276         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
277         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
278         /* TD*/
279         TA_RAS_BLOCK__GFX_TD_INDEX_START,
280         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
281         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
282         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
283         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
284         /* EA (3 sub-ranges)*/
285         TA_RAS_BLOCK__GFX_EA_INDEX_START,
286         /* EA range 0*/
287         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
288         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
289         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
290         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
291         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
292         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
293         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
294         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
295         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
296         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
297         /* EA range 1*/
298         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
299         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
300         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
301         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
302         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
303         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
304         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
305         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
306         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
307         /* EA range 2*/
308         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
309         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
310         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
311         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
312         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
313         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
314         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
315         /* UTC VM L2 bank*/
316         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
317         /* UTC VM walker*/
318         TA_RAS_BLOCK__UTC_VML2_WALKER,
319         /* UTC ATC L2 2MB cache*/
320         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
321         /* UTC ATC L2 4KB cache*/
322         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
323         TA_RAS_BLOCK__GFX_MAX
324 };
325
326 struct ras_gfx_subblock {
327         unsigned char *name;
328         int ta_subblock;
329         int hw_supported_error_type;
330         int sw_supported_error_type;
331 };
332
333 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
334         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
335                 #subblock,                                                     \
336                 TA_RAS_BLOCK__##subblock,                                      \
337                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
338                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
339         }
340
341 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
342         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
343         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
344         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
345         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
346         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
347         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
348         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
349         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
350         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
351         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
352         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
353         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
354         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
355         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
356         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
359                              0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
361                              0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
363         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
364         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
365         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
366         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
367         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
368         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
369         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
370                              0, 0),
371         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
372                              0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
374                              0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
376                              0),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
378                              0, 0),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
380                              0),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
382                              1),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
384                              0, 0, 0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
386                              0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
388                              0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
390                              0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
392                              0),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
394                              0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
396                              0, 0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
402                              0, 0, 0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
406                              0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
408                              0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
414                              0, 0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
418         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
420         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
422         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
424         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
425         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
426                              1),
427         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
428                              1),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
430                              1),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
432                              0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
434                              0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
436         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
438         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
440         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
442         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
444         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
447                              0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
450                              0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
452                              0, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
454                              0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
462         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
467         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
474         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
476         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
489 };
490
491 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
492 {
493         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
494         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
495         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
496         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
497         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
498         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
499         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
500         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
501         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
502         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
503         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
504         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
513 };
514
515 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
516 {
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
525         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
535 };
536
537 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
538 {
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
547         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
550 };
551
552 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
553 {
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
560         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
561         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
562         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
563         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
578 };
579
580 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
581 {
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
589 };
590
591 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
592 {
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
601         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
602         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
603         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612 };
613
614 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
615 {
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
619 };
620
621 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
622 {
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
624         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
638         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
639 };
640
641 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
642 {
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
647         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
648         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
656 };
657
658 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
659 {
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
668 };
669
670 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
671 {
672         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
673         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
674         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
675         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
676         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
677         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
678         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
679         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
680 };
681
682 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
683 {
684         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
685         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
686         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
687         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
688         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
689         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
690         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
691         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
692 };
693
694 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
695 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
696 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
697 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
698
699 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
700 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
701 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
702 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
703 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
704                                  struct amdgpu_cu_info *cu_info);
705 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
706 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
707 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
708 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
709 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
710                                           void *ras_error_status);
711 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
712                                      void *inject_if);
713
714 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
715 {
716         switch (adev->asic_type) {
717         case CHIP_VEGA10:
718                 soc15_program_register_sequence(adev,
719                                                 golden_settings_gc_9_0,
720                                                 ARRAY_SIZE(golden_settings_gc_9_0));
721                 soc15_program_register_sequence(adev,
722                                                 golden_settings_gc_9_0_vg10,
723                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
724                 break;
725         case CHIP_VEGA12:
726                 soc15_program_register_sequence(adev,
727                                                 golden_settings_gc_9_2_1,
728                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
729                 soc15_program_register_sequence(adev,
730                                                 golden_settings_gc_9_2_1_vg12,
731                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
732                 break;
733         case CHIP_VEGA20:
734                 soc15_program_register_sequence(adev,
735                                                 golden_settings_gc_9_0,
736                                                 ARRAY_SIZE(golden_settings_gc_9_0));
737                 soc15_program_register_sequence(adev,
738                                                 golden_settings_gc_9_0_vg20,
739                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
740                 break;
741         case CHIP_ARCTURUS:
742                 soc15_program_register_sequence(adev,
743                                                 golden_settings_gc_9_4_1_arct,
744                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
745                 break;
746         case CHIP_RAVEN:
747                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
748                                                 ARRAY_SIZE(golden_settings_gc_9_1));
749                 if (adev->rev_id >= 8)
750                         soc15_program_register_sequence(adev,
751                                                         golden_settings_gc_9_1_rv2,
752                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
753                 else
754                         soc15_program_register_sequence(adev,
755                                                         golden_settings_gc_9_1_rv1,
756                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
757                 break;
758         default:
759                 break;
760         }
761
762         if (adev->asic_type != CHIP_ARCTURUS)
763                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
764                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
765 }
766
767 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
768 {
769         adev->gfx.scratch.num_reg = 8;
770         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
771         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
772 }
773
774 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
775                                        bool wc, uint32_t reg, uint32_t val)
776 {
777         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
778         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
779                                 WRITE_DATA_DST_SEL(0) |
780                                 (wc ? WR_CONFIRM : 0));
781         amdgpu_ring_write(ring, reg);
782         amdgpu_ring_write(ring, 0);
783         amdgpu_ring_write(ring, val);
784 }
785
786 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
787                                   int mem_space, int opt, uint32_t addr0,
788                                   uint32_t addr1, uint32_t ref, uint32_t mask,
789                                   uint32_t inv)
790 {
791         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
792         amdgpu_ring_write(ring,
793                                  /* memory (1) or register (0) */
794                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
795                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
796                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
797                                  WAIT_REG_MEM_ENGINE(eng_sel)));
798
799         if (mem_space)
800                 BUG_ON(addr0 & 0x3); /* Dword align */
801         amdgpu_ring_write(ring, addr0);
802         amdgpu_ring_write(ring, addr1);
803         amdgpu_ring_write(ring, ref);
804         amdgpu_ring_write(ring, mask);
805         amdgpu_ring_write(ring, inv); /* poll interval */
806 }
807
808 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
809 {
810         struct amdgpu_device *adev = ring->adev;
811         uint32_t scratch;
812         uint32_t tmp = 0;
813         unsigned i;
814         int r;
815
816         r = amdgpu_gfx_scratch_get(adev, &scratch);
817         if (r)
818                 return r;
819
820         WREG32(scratch, 0xCAFEDEAD);
821         r = amdgpu_ring_alloc(ring, 3);
822         if (r)
823                 goto error_free_scratch;
824
825         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
826         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
827         amdgpu_ring_write(ring, 0xDEADBEEF);
828         amdgpu_ring_commit(ring);
829
830         for (i = 0; i < adev->usec_timeout; i++) {
831                 tmp = RREG32(scratch);
832                 if (tmp == 0xDEADBEEF)
833                         break;
834                 udelay(1);
835         }
836
837         if (i >= adev->usec_timeout)
838                 r = -ETIMEDOUT;
839
840 error_free_scratch:
841         amdgpu_gfx_scratch_free(adev, scratch);
842         return r;
843 }
844
845 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
846 {
847         struct amdgpu_device *adev = ring->adev;
848         struct amdgpu_ib ib;
849         struct dma_fence *f = NULL;
850
851         unsigned index;
852         uint64_t gpu_addr;
853         uint32_t tmp;
854         long r;
855
856         r = amdgpu_device_wb_get(adev, &index);
857         if (r)
858                 return r;
859
860         gpu_addr = adev->wb.gpu_addr + (index * 4);
861         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
862         memset(&ib, 0, sizeof(ib));
863         r = amdgpu_ib_get(adev, NULL, 16, &ib);
864         if (r)
865                 goto err1;
866
867         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
868         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
869         ib.ptr[2] = lower_32_bits(gpu_addr);
870         ib.ptr[3] = upper_32_bits(gpu_addr);
871         ib.ptr[4] = 0xDEADBEEF;
872         ib.length_dw = 5;
873
874         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
875         if (r)
876                 goto err2;
877
878         r = dma_fence_wait_timeout(f, false, timeout);
879         if (r == 0) {
880                 r = -ETIMEDOUT;
881                 goto err2;
882         } else if (r < 0) {
883                 goto err2;
884         }
885
886         tmp = adev->wb.wb[index];
887         if (tmp == 0xDEADBEEF)
888                 r = 0;
889         else
890                 r = -EINVAL;
891
892 err2:
893         amdgpu_ib_free(adev, &ib, NULL);
894         dma_fence_put(f);
895 err1:
896         amdgpu_device_wb_free(adev, index);
897         return r;
898 }
899
900
901 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
902 {
903         release_firmware(adev->gfx.pfp_fw);
904         adev->gfx.pfp_fw = NULL;
905         release_firmware(adev->gfx.me_fw);
906         adev->gfx.me_fw = NULL;
907         release_firmware(adev->gfx.ce_fw);
908         adev->gfx.ce_fw = NULL;
909         release_firmware(adev->gfx.rlc_fw);
910         adev->gfx.rlc_fw = NULL;
911         release_firmware(adev->gfx.mec_fw);
912         adev->gfx.mec_fw = NULL;
913         release_firmware(adev->gfx.mec2_fw);
914         adev->gfx.mec2_fw = NULL;
915
916         kfree(adev->gfx.rlc.register_list_format);
917 }
918
919 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
920 {
921         const struct rlc_firmware_header_v2_1 *rlc_hdr;
922
923         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
924         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
925         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
926         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
927         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
928         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
929         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
930         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
931         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
932         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
933         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
934         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
935         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
936         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
937                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
938 }
939
940 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
941 {
942         adev->gfx.me_fw_write_wait = false;
943         adev->gfx.mec_fw_write_wait = false;
944
945         switch (adev->asic_type) {
946         case CHIP_VEGA10:
947                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
948                     (adev->gfx.me_feature_version >= 42) &&
949                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
950                     (adev->gfx.pfp_feature_version >= 42))
951                         adev->gfx.me_fw_write_wait = true;
952
953                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
954                     (adev->gfx.mec_feature_version >= 42))
955                         adev->gfx.mec_fw_write_wait = true;
956                 break;
957         case CHIP_VEGA12:
958                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
959                     (adev->gfx.me_feature_version >= 44) &&
960                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
961                     (adev->gfx.pfp_feature_version >= 44))
962                         adev->gfx.me_fw_write_wait = true;
963
964                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
965                     (adev->gfx.mec_feature_version >= 44))
966                         adev->gfx.mec_fw_write_wait = true;
967                 break;
968         case CHIP_VEGA20:
969                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
970                     (adev->gfx.me_feature_version >= 44) &&
971                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
972                     (adev->gfx.pfp_feature_version >= 44))
973                         adev->gfx.me_fw_write_wait = true;
974
975                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
976                     (adev->gfx.mec_feature_version >= 44))
977                         adev->gfx.mec_fw_write_wait = true;
978                 break;
979         case CHIP_RAVEN:
980                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
981                     (adev->gfx.me_feature_version >= 42) &&
982                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
983                     (adev->gfx.pfp_feature_version >= 42))
984                         adev->gfx.me_fw_write_wait = true;
985
986                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
987                     (adev->gfx.mec_feature_version >= 42))
988                         adev->gfx.mec_fw_write_wait = true;
989                 break;
990         default:
991                 break;
992         }
993 }
994
995 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
996 {
997         switch (adev->asic_type) {
998         case CHIP_VEGA10:
999         case CHIP_VEGA12:
1000         case CHIP_VEGA20:
1001                 break;
1002         case CHIP_RAVEN:
1003                 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1004                         break;
1005                 if ((adev->gfx.rlc_fw_version != 106 &&
1006                      adev->gfx.rlc_fw_version < 531) ||
1007                     (adev->gfx.rlc_fw_version == 53815) ||
1008                     (adev->gfx.rlc_feature_version < 1) ||
1009                     !adev->gfx.rlc.is_rlc_v2_1)
1010                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1011                 break;
1012         default:
1013                 break;
1014         }
1015 }
1016
1017 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1018                                           const char *chip_name)
1019 {
1020         char fw_name[30];
1021         int err;
1022         struct amdgpu_firmware_info *info = NULL;
1023         const struct common_firmware_header *header = NULL;
1024         const struct gfx_firmware_header_v1_0 *cp_hdr;
1025
1026         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1027         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1028         if (err)
1029                 goto out;
1030         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1031         if (err)
1032                 goto out;
1033         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1034         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1035         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1036
1037         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1038         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1039         if (err)
1040                 goto out;
1041         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1042         if (err)
1043                 goto out;
1044         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1045         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1046         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1047
1048         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1049         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1050         if (err)
1051                 goto out;
1052         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1053         if (err)
1054                 goto out;
1055         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1056         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1057         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1058
1059         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1060                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1061                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1062                 info->fw = adev->gfx.pfp_fw;
1063                 header = (const struct common_firmware_header *)info->fw->data;
1064                 adev->firmware.fw_size +=
1065                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1066
1067                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1068                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1069                 info->fw = adev->gfx.me_fw;
1070                 header = (const struct common_firmware_header *)info->fw->data;
1071                 adev->firmware.fw_size +=
1072                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1073
1074                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1075                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1076                 info->fw = adev->gfx.ce_fw;
1077                 header = (const struct common_firmware_header *)info->fw->data;
1078                 adev->firmware.fw_size +=
1079                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1080         }
1081
1082 out:
1083         if (err) {
1084                 dev_err(adev->dev,
1085                         "gfx9: Failed to load firmware \"%s\"\n",
1086                         fw_name);
1087                 release_firmware(adev->gfx.pfp_fw);
1088                 adev->gfx.pfp_fw = NULL;
1089                 release_firmware(adev->gfx.me_fw);
1090                 adev->gfx.me_fw = NULL;
1091                 release_firmware(adev->gfx.ce_fw);
1092                 adev->gfx.ce_fw = NULL;
1093         }
1094         return err;
1095 }
1096
1097 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1098                                           const char *chip_name)
1099 {
1100         char fw_name[30];
1101         int err;
1102         struct amdgpu_firmware_info *info = NULL;
1103         const struct common_firmware_header *header = NULL;
1104         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1105         unsigned int *tmp = NULL;
1106         unsigned int i = 0;
1107         uint16_t version_major;
1108         uint16_t version_minor;
1109         uint32_t smu_version;
1110
1111         /*
1112          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1113          * instead of picasso_rlc.bin.
1114          * Judgment method:
1115          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1116          *          or revision >= 0xD8 && revision <= 0xDF
1117          * otherwise is PCO FP5
1118          */
1119         if (!strcmp(chip_name, "picasso") &&
1120                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1121                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1122                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1123         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1124                 (smu_version >= 0x41e2b))
1125                 /**
1126                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1127                 */
1128                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1129         else
1130                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1131         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1132         if (err)
1133                 goto out;
1134         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1135         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1136
1137         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1138         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1139         if (version_major == 2 && version_minor == 1)
1140                 adev->gfx.rlc.is_rlc_v2_1 = true;
1141
1142         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1143         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1144         adev->gfx.rlc.save_and_restore_offset =
1145                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1146         adev->gfx.rlc.clear_state_descriptor_offset =
1147                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1148         adev->gfx.rlc.avail_scratch_ram_locations =
1149                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1150         adev->gfx.rlc.reg_restore_list_size =
1151                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1152         adev->gfx.rlc.reg_list_format_start =
1153                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1154         adev->gfx.rlc.reg_list_format_separate_start =
1155                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1156         adev->gfx.rlc.starting_offsets_start =
1157                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1158         adev->gfx.rlc.reg_list_format_size_bytes =
1159                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1160         adev->gfx.rlc.reg_list_size_bytes =
1161                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1162         adev->gfx.rlc.register_list_format =
1163                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1164                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1165         if (!adev->gfx.rlc.register_list_format) {
1166                 err = -ENOMEM;
1167                 goto out;
1168         }
1169
1170         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1171                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1172         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1173                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1174
1175         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1176
1177         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1178                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1179         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1180                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1181
1182         if (adev->gfx.rlc.is_rlc_v2_1)
1183                 gfx_v9_0_init_rlc_ext_microcode(adev);
1184
1185         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1186                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1187                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1188                 info->fw = adev->gfx.rlc_fw;
1189                 header = (const struct common_firmware_header *)info->fw->data;
1190                 adev->firmware.fw_size +=
1191                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1192
1193                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1194                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1195                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1196                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1197                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1198                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1199                         info->fw = adev->gfx.rlc_fw;
1200                         adev->firmware.fw_size +=
1201                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1202
1203                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1204                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1205                         info->fw = adev->gfx.rlc_fw;
1206                         adev->firmware.fw_size +=
1207                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1208
1209                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1210                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1211                         info->fw = adev->gfx.rlc_fw;
1212                         adev->firmware.fw_size +=
1213                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1214                 }
1215         }
1216
1217 out:
1218         if (err) {
1219                 dev_err(adev->dev,
1220                         "gfx9: Failed to load firmware \"%s\"\n",
1221                         fw_name);
1222                 release_firmware(adev->gfx.rlc_fw);
1223                 adev->gfx.rlc_fw = NULL;
1224         }
1225         return err;
1226 }
1227
1228 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1229                                           const char *chip_name)
1230 {
1231         char fw_name[30];
1232         int err;
1233         struct amdgpu_firmware_info *info = NULL;
1234         const struct common_firmware_header *header = NULL;
1235         const struct gfx_firmware_header_v1_0 *cp_hdr;
1236
1237         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1238         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1239         if (err)
1240                 goto out;
1241         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1242         if (err)
1243                 goto out;
1244         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1245         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1246         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1247
1248
1249         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1250         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1251         if (!err) {
1252                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1253                 if (err)
1254                         goto out;
1255                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1256                 adev->gfx.mec2_fw->data;
1257                 adev->gfx.mec2_fw_version =
1258                 le32_to_cpu(cp_hdr->header.ucode_version);
1259                 adev->gfx.mec2_feature_version =
1260                 le32_to_cpu(cp_hdr->ucode_feature_version);
1261         } else {
1262                 err = 0;
1263                 adev->gfx.mec2_fw = NULL;
1264         }
1265
1266         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1267                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1268                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1269                 info->fw = adev->gfx.mec_fw;
1270                 header = (const struct common_firmware_header *)info->fw->data;
1271                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1272                 adev->firmware.fw_size +=
1273                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1274
1275                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1276                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1277                 info->fw = adev->gfx.mec_fw;
1278                 adev->firmware.fw_size +=
1279                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1280
1281                 if (adev->gfx.mec2_fw) {
1282                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1283                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1284                         info->fw = adev->gfx.mec2_fw;
1285                         header = (const struct common_firmware_header *)info->fw->data;
1286                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1287                         adev->firmware.fw_size +=
1288                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1289
1290                         /* TODO: Determine if MEC2 JT FW loading can be removed
1291                                  for all GFX V9 asic and above */
1292                         if (adev->asic_type != CHIP_ARCTURUS) {
1293                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1294                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1295                                 info->fw = adev->gfx.mec2_fw;
1296                                 adev->firmware.fw_size +=
1297                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1298                                         PAGE_SIZE);
1299                         }
1300                 }
1301         }
1302
1303 out:
1304         gfx_v9_0_check_if_need_gfxoff(adev);
1305         gfx_v9_0_check_fw_write_wait(adev);
1306         if (err) {
1307                 dev_err(adev->dev,
1308                         "gfx9: Failed to load firmware \"%s\"\n",
1309                         fw_name);
1310                 release_firmware(adev->gfx.mec_fw);
1311                 adev->gfx.mec_fw = NULL;
1312                 release_firmware(adev->gfx.mec2_fw);
1313                 adev->gfx.mec2_fw = NULL;
1314         }
1315         return err;
1316 }
1317
1318 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1319 {
1320         const char *chip_name;
1321         int r;
1322
1323         DRM_DEBUG("\n");
1324
1325         switch (adev->asic_type) {
1326         case CHIP_VEGA10:
1327                 chip_name = "vega10";
1328                 break;
1329         case CHIP_VEGA12:
1330                 chip_name = "vega12";
1331                 break;
1332         case CHIP_VEGA20:
1333                 chip_name = "vega20";
1334                 break;
1335         case CHIP_RAVEN:
1336                 if (adev->rev_id >= 8)
1337                         chip_name = "raven2";
1338                 else if (adev->pdev->device == 0x15d8)
1339                         chip_name = "picasso";
1340                 else
1341                         chip_name = "raven";
1342                 break;
1343         case CHIP_ARCTURUS:
1344                 chip_name = "arcturus";
1345                 break;
1346         default:
1347                 BUG();
1348         }
1349
1350         /* No CPG in Arcturus */
1351         if (adev->asic_type != CHIP_ARCTURUS) {
1352                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1353                 if (r)
1354                         return r;
1355         }
1356
1357         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1358         if (r)
1359                 return r;
1360
1361         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1362         if (r)
1363                 return r;
1364
1365         return r;
1366 }
1367
1368 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1369 {
1370         u32 count = 0;
1371         const struct cs_section_def *sect = NULL;
1372         const struct cs_extent_def *ext = NULL;
1373
1374         /* begin clear state */
1375         count += 2;
1376         /* context control state */
1377         count += 3;
1378
1379         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1380                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1381                         if (sect->id == SECT_CONTEXT)
1382                                 count += 2 + ext->reg_count;
1383                         else
1384                                 return 0;
1385                 }
1386         }
1387
1388         /* end clear state */
1389         count += 2;
1390         /* clear state */
1391         count += 2;
1392
1393         return count;
1394 }
1395
1396 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1397                                     volatile u32 *buffer)
1398 {
1399         u32 count = 0, i;
1400         const struct cs_section_def *sect = NULL;
1401         const struct cs_extent_def *ext = NULL;
1402
1403         if (adev->gfx.rlc.cs_data == NULL)
1404                 return;
1405         if (buffer == NULL)
1406                 return;
1407
1408         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1409         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1410
1411         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1412         buffer[count++] = cpu_to_le32(0x80000000);
1413         buffer[count++] = cpu_to_le32(0x80000000);
1414
1415         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1416                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1417                         if (sect->id == SECT_CONTEXT) {
1418                                 buffer[count++] =
1419                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1420                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1421                                                 PACKET3_SET_CONTEXT_REG_START);
1422                                 for (i = 0; i < ext->reg_count; i++)
1423                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1424                         } else {
1425                                 return;
1426                         }
1427                 }
1428         }
1429
1430         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1431         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1432
1433         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1434         buffer[count++] = cpu_to_le32(0);
1435 }
1436
1437 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1438 {
1439         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1440         uint32_t pg_always_on_cu_num = 2;
1441         uint32_t always_on_cu_num;
1442         uint32_t i, j, k;
1443         uint32_t mask, cu_bitmap, counter;
1444
1445         if (adev->flags & AMD_IS_APU)
1446                 always_on_cu_num = 4;
1447         else if (adev->asic_type == CHIP_VEGA12)
1448                 always_on_cu_num = 8;
1449         else
1450                 always_on_cu_num = 12;
1451
1452         mutex_lock(&adev->grbm_idx_mutex);
1453         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1454                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1455                         mask = 1;
1456                         cu_bitmap = 0;
1457                         counter = 0;
1458                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1459
1460                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1461                                 if (cu_info->bitmap[i][j] & mask) {
1462                                         if (counter == pg_always_on_cu_num)
1463                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1464                                         if (counter < always_on_cu_num)
1465                                                 cu_bitmap |= mask;
1466                                         else
1467                                                 break;
1468                                         counter++;
1469                                 }
1470                                 mask <<= 1;
1471                         }
1472
1473                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1474                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1475                 }
1476         }
1477         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1478         mutex_unlock(&adev->grbm_idx_mutex);
1479 }
1480
1481 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1482 {
1483         uint32_t data;
1484
1485         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1486         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1487         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1488         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1489         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1490
1491         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1492         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1493
1494         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1495         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1496
1497         mutex_lock(&adev->grbm_idx_mutex);
1498         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1499         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1500         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1501
1502         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1503         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1504         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1505         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1506         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1507
1508         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1509         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1510         data &= 0x0000FFFF;
1511         data |= 0x00C00000;
1512         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1513
1514         /*
1515          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1516          * programmed in gfx_v9_0_init_always_on_cu_mask()
1517          */
1518
1519         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1520          * but used for RLC_LB_CNTL configuration */
1521         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1522         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1523         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1524         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1525         mutex_unlock(&adev->grbm_idx_mutex);
1526
1527         gfx_v9_0_init_always_on_cu_mask(adev);
1528 }
1529
1530 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1531 {
1532         uint32_t data;
1533
1534         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1535         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1536         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1537         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1538         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1539
1540         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1541         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1542
1543         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1544         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1545
1546         mutex_lock(&adev->grbm_idx_mutex);
1547         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1548         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1549         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1550
1551         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1552         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1553         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1554         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1555         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1556
1557         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1558         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1559         data &= 0x0000FFFF;
1560         data |= 0x00C00000;
1561         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1562
1563         /*
1564          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1565          * programmed in gfx_v9_0_init_always_on_cu_mask()
1566          */
1567
1568         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1569          * but used for RLC_LB_CNTL configuration */
1570         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1571         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1572         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1573         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1574         mutex_unlock(&adev->grbm_idx_mutex);
1575
1576         gfx_v9_0_init_always_on_cu_mask(adev);
1577 }
1578
1579 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1580 {
1581         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1582 }
1583
1584 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1585 {
1586         return 5;
1587 }
1588
1589 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1590 {
1591         const struct cs_section_def *cs_data;
1592         int r;
1593
1594         adev->gfx.rlc.cs_data = gfx9_cs_data;
1595
1596         cs_data = adev->gfx.rlc.cs_data;
1597
1598         if (cs_data) {
1599                 /* init clear state block */
1600                 r = amdgpu_gfx_rlc_init_csb(adev);
1601                 if (r)
1602                         return r;
1603         }
1604
1605         if (adev->asic_type == CHIP_RAVEN) {
1606                 /* TODO: double check the cp_table_size for RV */
1607                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1608                 r = amdgpu_gfx_rlc_init_cpt(adev);
1609                 if (r)
1610                         return r;
1611         }
1612
1613         switch (adev->asic_type) {
1614         case CHIP_RAVEN:
1615                 gfx_v9_0_init_lbpw(adev);
1616                 break;
1617         case CHIP_VEGA20:
1618                 gfx_v9_4_init_lbpw(adev);
1619                 break;
1620         default:
1621                 break;
1622         }
1623
1624         return 0;
1625 }
1626
1627 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1628 {
1629         int r;
1630
1631         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1632         if (unlikely(r != 0))
1633                 return r;
1634
1635         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1636                         AMDGPU_GEM_DOMAIN_VRAM);
1637         if (!r)
1638                 adev->gfx.rlc.clear_state_gpu_addr =
1639                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1640
1641         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1642
1643         return r;
1644 }
1645
1646 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1647 {
1648         int r;
1649
1650         if (!adev->gfx.rlc.clear_state_obj)
1651                 return;
1652
1653         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1654         if (likely(r == 0)) {
1655                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1656                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1657         }
1658 }
1659
1660 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1661 {
1662         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1663         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1664 }
1665
1666 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1667 {
1668         int r;
1669         u32 *hpd;
1670         const __le32 *fw_data;
1671         unsigned fw_size;
1672         u32 *fw;
1673         size_t mec_hpd_size;
1674
1675         const struct gfx_firmware_header_v1_0 *mec_hdr;
1676
1677         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1678
1679         /* take ownership of the relevant compute queues */
1680         amdgpu_gfx_compute_queue_acquire(adev);
1681         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1682
1683         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1684                                       AMDGPU_GEM_DOMAIN_VRAM,
1685                                       &adev->gfx.mec.hpd_eop_obj,
1686                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1687                                       (void **)&hpd);
1688         if (r) {
1689                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1690                 gfx_v9_0_mec_fini(adev);
1691                 return r;
1692         }
1693
1694         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1695
1696         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1697         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1698
1699         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1700
1701         fw_data = (const __le32 *)
1702                 (adev->gfx.mec_fw->data +
1703                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1704         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1705
1706         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1707                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1708                                       &adev->gfx.mec.mec_fw_obj,
1709                                       &adev->gfx.mec.mec_fw_gpu_addr,
1710                                       (void **)&fw);
1711         if (r) {
1712                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1713                 gfx_v9_0_mec_fini(adev);
1714                 return r;
1715         }
1716
1717         memcpy(fw, fw_data, fw_size);
1718
1719         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1720         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1721
1722         return 0;
1723 }
1724
1725 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1726 {
1727         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1728                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1729                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1730                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1731                 (SQ_IND_INDEX__FORCE_READ_MASK));
1732         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1733 }
1734
1735 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1736                            uint32_t wave, uint32_t thread,
1737                            uint32_t regno, uint32_t num, uint32_t *out)
1738 {
1739         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1740                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1741                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1742                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1743                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1744                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1745                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1746         while (num--)
1747                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1748 }
1749
1750 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1751 {
1752         /* type 1 wave data */
1753         dst[(*no_fields)++] = 1;
1754         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1755         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1756         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1757         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1758         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1759         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1760         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1761         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1762         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1763         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1764         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1765         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1766         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1767         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1768 }
1769
1770 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1771                                      uint32_t wave, uint32_t start,
1772                                      uint32_t size, uint32_t *dst)
1773 {
1774         wave_read_regs(
1775                 adev, simd, wave, 0,
1776                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1777 }
1778
1779 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1780                                      uint32_t wave, uint32_t thread,
1781                                      uint32_t start, uint32_t size,
1782                                      uint32_t *dst)
1783 {
1784         wave_read_regs(
1785                 adev, simd, wave, thread,
1786                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1787 }
1788
1789 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1790                                   u32 me, u32 pipe, u32 q, u32 vm)
1791 {
1792         soc15_grbm_select(adev, me, pipe, q, vm);
1793 }
1794
1795 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1796         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1797         .select_se_sh = &gfx_v9_0_select_se_sh,
1798         .read_wave_data = &gfx_v9_0_read_wave_data,
1799         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1800         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1801         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1802         .ras_error_inject = &gfx_v9_0_ras_error_inject,
1803         .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1804 };
1805
1806 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1807 {
1808         u32 gb_addr_config;
1809         int err;
1810
1811         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1812
1813         switch (adev->asic_type) {
1814         case CHIP_VEGA10:
1815                 adev->gfx.config.max_hw_contexts = 8;
1816                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1817                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1818                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1819                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1820                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1821                 break;
1822         case CHIP_VEGA12:
1823                 adev->gfx.config.max_hw_contexts = 8;
1824                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1825                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1826                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1827                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1828                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1829                 DRM_INFO("fix gfx.config for vega12\n");
1830                 break;
1831         case CHIP_VEGA20:
1832                 adev->gfx.config.max_hw_contexts = 8;
1833                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1834                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1835                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1836                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1837                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1838                 gb_addr_config &= ~0xf3e777ff;
1839                 gb_addr_config |= 0x22014042;
1840                 /* check vbios table if gpu info is not available */
1841                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1842                 if (err)
1843                         return err;
1844                 break;
1845         case CHIP_RAVEN:
1846                 adev->gfx.config.max_hw_contexts = 8;
1847                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1848                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1849                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1850                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1851                 if (adev->rev_id >= 8)
1852                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1853                 else
1854                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1855                 break;
1856         case CHIP_ARCTURUS:
1857                 adev->gfx.config.max_hw_contexts = 8;
1858                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1859                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1860                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1861                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1862                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1863                 gb_addr_config &= ~0xf3e777ff;
1864                 gb_addr_config |= 0x22014042;
1865                 break;
1866         default:
1867                 BUG();
1868                 break;
1869         }
1870
1871         adev->gfx.config.gb_addr_config = gb_addr_config;
1872
1873         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1874                         REG_GET_FIELD(
1875                                         adev->gfx.config.gb_addr_config,
1876                                         GB_ADDR_CONFIG,
1877                                         NUM_PIPES);
1878
1879         adev->gfx.config.max_tile_pipes =
1880                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1881
1882         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1883                         REG_GET_FIELD(
1884                                         adev->gfx.config.gb_addr_config,
1885                                         GB_ADDR_CONFIG,
1886                                         NUM_BANKS);
1887         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1888                         REG_GET_FIELD(
1889                                         adev->gfx.config.gb_addr_config,
1890                                         GB_ADDR_CONFIG,
1891                                         MAX_COMPRESSED_FRAGS);
1892         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1893                         REG_GET_FIELD(
1894                                         adev->gfx.config.gb_addr_config,
1895                                         GB_ADDR_CONFIG,
1896                                         NUM_RB_PER_SE);
1897         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1898                         REG_GET_FIELD(
1899                                         adev->gfx.config.gb_addr_config,
1900                                         GB_ADDR_CONFIG,
1901                                         NUM_SHADER_ENGINES);
1902         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1903                         REG_GET_FIELD(
1904                                         adev->gfx.config.gb_addr_config,
1905                                         GB_ADDR_CONFIG,
1906                                         PIPE_INTERLEAVE_SIZE));
1907
1908         return 0;
1909 }
1910
1911 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1912                                    struct amdgpu_ngg_buf *ngg_buf,
1913                                    int size_se,
1914                                    int default_size_se)
1915 {
1916         int r;
1917
1918         if (size_se < 0) {
1919                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1920                 return -EINVAL;
1921         }
1922         size_se = size_se ? size_se : default_size_se;
1923
1924         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1925         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1926                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1927                                     &ngg_buf->bo,
1928                                     &ngg_buf->gpu_addr,
1929                                     NULL);
1930         if (r) {
1931                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1932                 return r;
1933         }
1934         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1935
1936         return r;
1937 }
1938
1939 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1940 {
1941         int i;
1942
1943         for (i = 0; i < NGG_BUF_MAX; i++)
1944                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1945                                       &adev->gfx.ngg.buf[i].gpu_addr,
1946                                       NULL);
1947
1948         memset(&adev->gfx.ngg.buf[0], 0,
1949                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1950
1951         adev->gfx.ngg.init = false;
1952
1953         return 0;
1954 }
1955
1956 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1957 {
1958         int r;
1959
1960         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1961                 return 0;
1962
1963         /* GDS reserve memory: 64 bytes alignment */
1964         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1965         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1966         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1967         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1968
1969         /* Primitive Buffer */
1970         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1971                                     amdgpu_prim_buf_per_se,
1972                                     64 * 1024);
1973         if (r) {
1974                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1975                 goto err;
1976         }
1977
1978         /* Position Buffer */
1979         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1980                                     amdgpu_pos_buf_per_se,
1981                                     256 * 1024);
1982         if (r) {
1983                 dev_err(adev->dev, "Failed to create Position Buffer\n");
1984                 goto err;
1985         }
1986
1987         /* Control Sideband */
1988         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1989                                     amdgpu_cntl_sb_buf_per_se,
1990                                     256);
1991         if (r) {
1992                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1993                 goto err;
1994         }
1995
1996         /* Parameter Cache, not created by default */
1997         if (amdgpu_param_buf_per_se <= 0)
1998                 goto out;
1999
2000         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2001                                     amdgpu_param_buf_per_se,
2002                                     512 * 1024);
2003         if (r) {
2004                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
2005                 goto err;
2006         }
2007
2008 out:
2009         adev->gfx.ngg.init = true;
2010         return 0;
2011 err:
2012         gfx_v9_0_ngg_fini(adev);
2013         return r;
2014 }
2015
2016 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2017 {
2018         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2019         int r;
2020         u32 data, base;
2021
2022         if (!amdgpu_ngg)
2023                 return 0;
2024
2025         /* Program buffer size */
2026         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2027                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2028         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2029                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
2030         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2031
2032         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2033                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2034         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2035                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2036         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2037
2038         /* Program buffer base address */
2039         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2040         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2041         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2042
2043         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2044         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2045         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2046
2047         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2048         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2049         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2050
2051         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2052         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2053         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2054
2055         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2056         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2057         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2058
2059         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2060         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2061         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2062
2063         /* Clear GDS reserved memory */
2064         r = amdgpu_ring_alloc(ring, 17);
2065         if (r) {
2066                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2067                           ring->name, r);
2068                 return r;
2069         }
2070
2071         gfx_v9_0_write_data_to_reg(ring, 0, false,
2072                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2073                                    (adev->gds.gds_size +
2074                                     adev->gfx.ngg.gds_reserve_size));
2075
2076         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2077         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2078                                 PACKET3_DMA_DATA_DST_SEL(1) |
2079                                 PACKET3_DMA_DATA_SRC_SEL(2)));
2080         amdgpu_ring_write(ring, 0);
2081         amdgpu_ring_write(ring, 0);
2082         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2083         amdgpu_ring_write(ring, 0);
2084         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2085                                 adev->gfx.ngg.gds_reserve_size);
2086
2087         gfx_v9_0_write_data_to_reg(ring, 0, false,
2088                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2089
2090         amdgpu_ring_commit(ring);
2091
2092         return 0;
2093 }
2094
2095 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2096                                       int mec, int pipe, int queue)
2097 {
2098         int r;
2099         unsigned irq_type;
2100         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2101
2102         ring = &adev->gfx.compute_ring[ring_id];
2103
2104         /* mec0 is me1 */
2105         ring->me = mec + 1;
2106         ring->pipe = pipe;
2107         ring->queue = queue;
2108
2109         ring->ring_obj = NULL;
2110         ring->use_doorbell = true;
2111         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2112         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2113                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2114         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2115
2116         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2117                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2118                 + ring->pipe;
2119
2120         /* type-2 packets are deprecated on MEC, use type-3 instead */
2121         r = amdgpu_ring_init(adev, ring, 1024,
2122                              &adev->gfx.eop_irq, irq_type);
2123         if (r)
2124                 return r;
2125
2126
2127         return 0;
2128 }
2129
2130 static int gfx_v9_0_sw_init(void *handle)
2131 {
2132         int i, j, k, r, ring_id;
2133         struct amdgpu_ring *ring;
2134         struct amdgpu_kiq *kiq;
2135         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2136
2137         switch (adev->asic_type) {
2138         case CHIP_VEGA10:
2139         case CHIP_VEGA12:
2140         case CHIP_VEGA20:
2141         case CHIP_RAVEN:
2142         case CHIP_ARCTURUS:
2143                 adev->gfx.mec.num_mec = 2;
2144                 break;
2145         default:
2146                 adev->gfx.mec.num_mec = 1;
2147                 break;
2148         }
2149
2150         adev->gfx.mec.num_pipe_per_mec = 4;
2151         adev->gfx.mec.num_queue_per_pipe = 8;
2152
2153         /* EOP Event */
2154         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2155         if (r)
2156                 return r;
2157
2158         /* Privileged reg */
2159         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2160                               &adev->gfx.priv_reg_irq);
2161         if (r)
2162                 return r;
2163
2164         /* Privileged inst */
2165         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2166                               &adev->gfx.priv_inst_irq);
2167         if (r)
2168                 return r;
2169
2170         /* ECC error */
2171         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2172                               &adev->gfx.cp_ecc_error_irq);
2173         if (r)
2174                 return r;
2175
2176         /* FUE error */
2177         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2178                               &adev->gfx.cp_ecc_error_irq);
2179         if (r)
2180                 return r;
2181
2182         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2183
2184         gfx_v9_0_scratch_init(adev);
2185
2186         r = gfx_v9_0_init_microcode(adev);
2187         if (r) {
2188                 DRM_ERROR("Failed to load gfx firmware!\n");
2189                 return r;
2190         }
2191
2192         r = adev->gfx.rlc.funcs->init(adev);
2193         if (r) {
2194                 DRM_ERROR("Failed to init rlc BOs!\n");
2195                 return r;
2196         }
2197
2198         r = gfx_v9_0_mec_init(adev);
2199         if (r) {
2200                 DRM_ERROR("Failed to init MEC BOs!\n");
2201                 return r;
2202         }
2203
2204         /* set up the gfx ring */
2205         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2206                 ring = &adev->gfx.gfx_ring[i];
2207                 ring->ring_obj = NULL;
2208                 if (!i)
2209                         sprintf(ring->name, "gfx");
2210                 else
2211                         sprintf(ring->name, "gfx_%d", i);
2212                 ring->use_doorbell = true;
2213                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2214                 r = amdgpu_ring_init(adev, ring, 1024,
2215                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2216                 if (r)
2217                         return r;
2218         }
2219
2220         /* set up the compute queues - allocate horizontally across pipes */
2221         ring_id = 0;
2222         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2223                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2224                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2225                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2226                                         continue;
2227
2228                                 r = gfx_v9_0_compute_ring_init(adev,
2229                                                                ring_id,
2230                                                                i, k, j);
2231                                 if (r)
2232                                         return r;
2233
2234                                 ring_id++;
2235                         }
2236                 }
2237         }
2238
2239         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2240         if (r) {
2241                 DRM_ERROR("Failed to init KIQ BOs!\n");
2242                 return r;
2243         }
2244
2245         kiq = &adev->gfx.kiq;
2246         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2247         if (r)
2248                 return r;
2249
2250         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2251         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2252         if (r)
2253                 return r;
2254
2255         adev->gfx.ce_ram_size = 0x8000;
2256
2257         r = gfx_v9_0_gpu_early_init(adev);
2258         if (r)
2259                 return r;
2260
2261         r = gfx_v9_0_ngg_init(adev);
2262         if (r)
2263                 return r;
2264
2265         return 0;
2266 }
2267
2268
2269 static int gfx_v9_0_sw_fini(void *handle)
2270 {
2271         int i;
2272         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2273
2274         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2275                         adev->gfx.ras_if) {
2276                 struct ras_common_if *ras_if = adev->gfx.ras_if;
2277                 struct ras_ih_if ih_info = {
2278                         .head = *ras_if,
2279                 };
2280
2281                 amdgpu_ras_debugfs_remove(adev, ras_if);
2282                 amdgpu_ras_sysfs_remove(adev, ras_if);
2283                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2284                 amdgpu_ras_feature_enable(adev, ras_if, 0);
2285                 kfree(ras_if);
2286         }
2287
2288         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2289                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2290         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2291                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2292
2293         amdgpu_gfx_mqd_sw_fini(adev);
2294         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2295         amdgpu_gfx_kiq_fini(adev);
2296
2297         gfx_v9_0_mec_fini(adev);
2298         gfx_v9_0_ngg_fini(adev);
2299         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2300         if (adev->asic_type == CHIP_RAVEN) {
2301                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2302                                 &adev->gfx.rlc.cp_table_gpu_addr,
2303                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2304         }
2305         gfx_v9_0_free_microcode(adev);
2306
2307         return 0;
2308 }
2309
2310
2311 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2312 {
2313         /* TODO */
2314 }
2315
2316 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2317 {
2318         u32 data;
2319
2320         if (instance == 0xffffffff)
2321                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2322         else
2323                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2324
2325         if (se_num == 0xffffffff)
2326                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2327         else
2328                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2329
2330         if (sh_num == 0xffffffff)
2331                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2332         else
2333                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2334
2335         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2336 }
2337
2338 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2339 {
2340         u32 data, mask;
2341
2342         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2343         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2344
2345         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2346         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2347
2348         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2349                                          adev->gfx.config.max_sh_per_se);
2350
2351         return (~data) & mask;
2352 }
2353
2354 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2355 {
2356         int i, j;
2357         u32 data;
2358         u32 active_rbs = 0;
2359         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2360                                         adev->gfx.config.max_sh_per_se;
2361
2362         mutex_lock(&adev->grbm_idx_mutex);
2363         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2364                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2365                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2366                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2367                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2368                                                rb_bitmap_width_per_sh);
2369                 }
2370         }
2371         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2372         mutex_unlock(&adev->grbm_idx_mutex);
2373
2374         adev->gfx.config.backend_enable_mask = active_rbs;
2375         adev->gfx.config.num_rbs = hweight32(active_rbs);
2376 }
2377
2378 #define DEFAULT_SH_MEM_BASES    (0x6000)
2379 #define FIRST_COMPUTE_VMID      (8)
2380 #define LAST_COMPUTE_VMID       (16)
2381 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2382 {
2383         int i;
2384         uint32_t sh_mem_config;
2385         uint32_t sh_mem_bases;
2386
2387         /*
2388          * Configure apertures:
2389          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2390          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2391          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2392          */
2393         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2394
2395         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2396                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2397                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2398
2399         mutex_lock(&adev->srbm_mutex);
2400         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2401                 soc15_grbm_select(adev, 0, 0, 0, i);
2402                 /* CP and shaders */
2403                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2404                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2405         }
2406         soc15_grbm_select(adev, 0, 0, 0, 0);
2407         mutex_unlock(&adev->srbm_mutex);
2408
2409         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2410            acccess. These should be enabled by FW for target VMIDs. */
2411         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2412                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2413                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2414                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2415                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2416         }
2417 }
2418
2419 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2420 {
2421         int vmid;
2422
2423         /*
2424          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2425          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2426          * the driver can enable them for graphics. VMID0 should maintain
2427          * access so that HWS firmware can save/restore entries.
2428          */
2429         for (vmid = 1; vmid < 16; vmid++) {
2430                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2431                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2432                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2433                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2434         }
2435 }
2436
2437 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2438 {
2439         u32 tmp;
2440         int i;
2441
2442         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2443
2444         gfx_v9_0_tiling_mode_table_init(adev);
2445
2446         gfx_v9_0_setup_rb(adev);
2447         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2448         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2449
2450         /* XXX SH_MEM regs */
2451         /* where to put LDS, scratch, GPUVM in FSA64 space */
2452         mutex_lock(&adev->srbm_mutex);
2453         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2454                 soc15_grbm_select(adev, 0, 0, 0, i);
2455                 /* CP and shaders */
2456                 if (i == 0) {
2457                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2458                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2459                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2460                                             !!amdgpu_noretry);
2461                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2462                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2463                 } else {
2464                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2465                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2466                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2467                                             !!amdgpu_noretry);
2468                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2469                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2470                                 (adev->gmc.private_aperture_start >> 48));
2471                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2472                                 (adev->gmc.shared_aperture_start >> 48));
2473                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2474                 }
2475         }
2476         soc15_grbm_select(adev, 0, 0, 0, 0);
2477
2478         mutex_unlock(&adev->srbm_mutex);
2479
2480         gfx_v9_0_init_compute_vmid(adev);
2481         gfx_v9_0_init_gds_vmid(adev);
2482 }
2483
2484 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2485 {
2486         u32 i, j, k;
2487         u32 mask;
2488
2489         mutex_lock(&adev->grbm_idx_mutex);
2490         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2491                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2492                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2493                         for (k = 0; k < adev->usec_timeout; k++) {
2494                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2495                                         break;
2496                                 udelay(1);
2497                         }
2498                         if (k == adev->usec_timeout) {
2499                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2500                                                       0xffffffff, 0xffffffff);
2501                                 mutex_unlock(&adev->grbm_idx_mutex);
2502                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2503                                          i, j);
2504                                 return;
2505                         }
2506                 }
2507         }
2508         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2509         mutex_unlock(&adev->grbm_idx_mutex);
2510
2511         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2512                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2513                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2514                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2515         for (k = 0; k < adev->usec_timeout; k++) {
2516                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2517                         break;
2518                 udelay(1);
2519         }
2520 }
2521
2522 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2523                                                bool enable)
2524 {
2525         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2526
2527         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2528         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2529         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2530         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2531
2532         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2533 }
2534
2535 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2536 {
2537         /* csib */
2538         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2539                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2540         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2541                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2542         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2543                         adev->gfx.rlc.clear_state_size);
2544 }
2545
2546 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2547                                 int indirect_offset,
2548                                 int list_size,
2549                                 int *unique_indirect_regs,
2550                                 int unique_indirect_reg_count,
2551                                 int *indirect_start_offsets,
2552                                 int *indirect_start_offsets_count,
2553                                 int max_start_offsets_count)
2554 {
2555         int idx;
2556
2557         for (; indirect_offset < list_size; indirect_offset++) {
2558                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2559                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2560                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2561
2562                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2563                         indirect_offset += 2;
2564
2565                         /* look for the matching indice */
2566                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2567                                 if (unique_indirect_regs[idx] ==
2568                                         register_list_format[indirect_offset] ||
2569                                         !unique_indirect_regs[idx])
2570                                         break;
2571                         }
2572
2573                         BUG_ON(idx >= unique_indirect_reg_count);
2574
2575                         if (!unique_indirect_regs[idx])
2576                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2577
2578                         indirect_offset++;
2579                 }
2580         }
2581 }
2582
2583 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2584 {
2585         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2586         int unique_indirect_reg_count = 0;
2587
2588         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2589         int indirect_start_offsets_count = 0;
2590
2591         int list_size = 0;
2592         int i = 0, j = 0;
2593         u32 tmp = 0;
2594
2595         u32 *register_list_format =
2596                 kmemdup(adev->gfx.rlc.register_list_format,
2597                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2598         if (!register_list_format)
2599                 return -ENOMEM;
2600
2601         /* setup unique_indirect_regs array and indirect_start_offsets array */
2602         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2603         gfx_v9_1_parse_ind_reg_list(register_list_format,
2604                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2605                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2606                                     unique_indirect_regs,
2607                                     unique_indirect_reg_count,
2608                                     indirect_start_offsets,
2609                                     &indirect_start_offsets_count,
2610                                     ARRAY_SIZE(indirect_start_offsets));
2611
2612         /* enable auto inc in case it is disabled */
2613         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2614         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2615         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2616
2617         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2618         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2619                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2620         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2621                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2622                         adev->gfx.rlc.register_restore[i]);
2623
2624         /* load indirect register */
2625         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2626                 adev->gfx.rlc.reg_list_format_start);
2627
2628         /* direct register portion */
2629         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2630                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2631                         register_list_format[i]);
2632
2633         /* indirect register portion */
2634         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2635                 if (register_list_format[i] == 0xFFFFFFFF) {
2636                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2637                         continue;
2638                 }
2639
2640                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2641                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2642
2643                 for (j = 0; j < unique_indirect_reg_count; j++) {
2644                         if (register_list_format[i] == unique_indirect_regs[j]) {
2645                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2646                                 break;
2647                         }
2648                 }
2649
2650                 BUG_ON(j >= unique_indirect_reg_count);
2651
2652                 i++;
2653         }
2654
2655         /* set save/restore list size */
2656         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2657         list_size = list_size >> 1;
2658         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2659                 adev->gfx.rlc.reg_restore_list_size);
2660         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2661
2662         /* write the starting offsets to RLC scratch ram */
2663         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2664                 adev->gfx.rlc.starting_offsets_start);
2665         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2666                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2667                        indirect_start_offsets[i]);
2668
2669         /* load unique indirect regs*/
2670         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2671                 if (unique_indirect_regs[i] != 0) {
2672                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2673                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2674                                unique_indirect_regs[i] & 0x3FFFF);
2675
2676                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2677                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2678                                unique_indirect_regs[i] >> 20);
2679                 }
2680         }
2681
2682         kfree(register_list_format);
2683         return 0;
2684 }
2685
2686 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2687 {
2688         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2689 }
2690
2691 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2692                                              bool enable)
2693 {
2694         uint32_t data = 0;
2695         uint32_t default_data = 0;
2696
2697         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2698         if (enable == true) {
2699                 /* enable GFXIP control over CGPG */
2700                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2701                 if(default_data != data)
2702                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2703
2704                 /* update status */
2705                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2706                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2707                 if(default_data != data)
2708                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2709         } else {
2710                 /* restore GFXIP control over GCPG */
2711                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2712                 if(default_data != data)
2713                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2714         }
2715 }
2716
2717 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2718 {
2719         uint32_t data = 0;
2720
2721         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2722                               AMD_PG_SUPPORT_GFX_SMG |
2723                               AMD_PG_SUPPORT_GFX_DMG)) {
2724                 /* init IDLE_POLL_COUNT = 60 */
2725                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2726                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2727                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2728                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2729
2730                 /* init RLC PG Delay */
2731                 data = 0;
2732                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2733                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2734                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2735                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2736                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2737
2738                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2739                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2740                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2741                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2742
2743                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2744                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2745                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2746                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2747
2748                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2749                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2750
2751                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2752                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2753                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2754
2755                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2756         }
2757 }
2758
2759 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2760                                                 bool enable)
2761 {
2762         uint32_t data = 0;
2763         uint32_t default_data = 0;
2764
2765         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2766         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2767                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2768                              enable ? 1 : 0);
2769         if (default_data != data)
2770                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2771 }
2772
2773 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2774                                                 bool enable)
2775 {
2776         uint32_t data = 0;
2777         uint32_t default_data = 0;
2778
2779         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2780         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2781                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2782                              enable ? 1 : 0);
2783         if(default_data != data)
2784                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2785 }
2786
2787 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2788                                         bool enable)
2789 {
2790         uint32_t data = 0;
2791         uint32_t default_data = 0;
2792
2793         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2794         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2795                              CP_PG_DISABLE,
2796                              enable ? 0 : 1);
2797         if(default_data != data)
2798                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2799 }
2800
2801 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2802                                                 bool enable)
2803 {
2804         uint32_t data, default_data;
2805
2806         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2807         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2808                              GFX_POWER_GATING_ENABLE,
2809                              enable ? 1 : 0);
2810         if(default_data != data)
2811                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2812 }
2813
2814 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2815                                                 bool enable)
2816 {
2817         uint32_t data, default_data;
2818
2819         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2820         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2821                              GFX_PIPELINE_PG_ENABLE,
2822                              enable ? 1 : 0);
2823         if(default_data != data)
2824                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2825
2826         if (!enable)
2827                 /* read any GFX register to wake up GFX */
2828                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2829 }
2830
2831 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2832                                                        bool enable)
2833 {
2834         uint32_t data, default_data;
2835
2836         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2837         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2838                              STATIC_PER_CU_PG_ENABLE,
2839                              enable ? 1 : 0);
2840         if(default_data != data)
2841                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2842 }
2843
2844 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2845                                                 bool enable)
2846 {
2847         uint32_t data, default_data;
2848
2849         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2850         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2851                              DYN_PER_CU_PG_ENABLE,
2852                              enable ? 1 : 0);
2853         if(default_data != data)
2854                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2855 }
2856
2857 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2858 {
2859         gfx_v9_0_init_csb(adev);
2860
2861         /*
2862          * Rlc save restore list is workable since v2_1.
2863          * And it's needed by gfxoff feature.
2864          */
2865         if (adev->gfx.rlc.is_rlc_v2_1) {
2866                 gfx_v9_1_init_rlc_save_restore_list(adev);
2867                 gfx_v9_0_enable_save_restore_machine(adev);
2868         }
2869
2870         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2871                               AMD_PG_SUPPORT_GFX_SMG |
2872                               AMD_PG_SUPPORT_GFX_DMG |
2873                               AMD_PG_SUPPORT_CP |
2874                               AMD_PG_SUPPORT_GDS |
2875                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2876                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2877                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2878                 gfx_v9_0_init_gfx_power_gating(adev);
2879         }
2880 }
2881
2882 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2883 {
2884         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2885         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2886         gfx_v9_0_wait_for_rlc_serdes(adev);
2887 }
2888
2889 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2890 {
2891         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2892         udelay(50);
2893         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2894         udelay(50);
2895 }
2896
2897 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2898 {
2899 #ifdef AMDGPU_RLC_DEBUG_RETRY
2900         u32 rlc_ucode_ver;
2901 #endif
2902
2903         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2904         udelay(50);
2905
2906         /* carrizo do enable cp interrupt after cp inited */
2907         if (!(adev->flags & AMD_IS_APU)) {
2908                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2909                 udelay(50);
2910         }
2911
2912 #ifdef AMDGPU_RLC_DEBUG_RETRY
2913         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2914         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2915         if(rlc_ucode_ver == 0x108) {
2916                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2917                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2918                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2919                  * default is 0x9C4 to create a 100us interval */
2920                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2921                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2922                  * to disable the page fault retry interrupts, default is
2923                  * 0x100 (256) */
2924                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2925         }
2926 #endif
2927 }
2928
2929 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2930 {
2931         const struct rlc_firmware_header_v2_0 *hdr;
2932         const __le32 *fw_data;
2933         unsigned i, fw_size;
2934
2935         if (!adev->gfx.rlc_fw)
2936                 return -EINVAL;
2937
2938         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2939         amdgpu_ucode_print_rlc_hdr(&hdr->header);
2940
2941         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2942                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2943         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2944
2945         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2946                         RLCG_UCODE_LOADING_START_ADDRESS);
2947         for (i = 0; i < fw_size; i++)
2948                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2949         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2950
2951         return 0;
2952 }
2953
2954 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2955 {
2956         int r;